2 ;; Copyright (C) 2009-2024 Free Software Foundation, Inc.
3 ;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com>
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published
9 ;; by the Free Software Foundation; either version 3, or (at your
10 ;; option) any later version.
12 ;; GCC is distributed in the hope that it will be useful, but WITHOUT
13 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 ;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 ;; License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 ;; Iterator for comparison types
22 (define_code_iterator CMP_TEST [eq lt gt unordered])
24 ;; Mode attribute for vector floate and floato conversions
25 (define_mode_attr VF_sxddp [(V2DI "sxd") (V2DF "dp")])
27 ;; Iterator for both scalar and vector floating point types supported by VSX
28 (define_mode_iterator VSX_B [DF V4SF V2DF])
30 ;; Iterator for the 2 64-bit vector types
31 (define_mode_iterator VSX_D [V2DF V2DI])
33 ;; Mode iterator to handle swapping words on little endian for the 128-bit
34 ;; types that goes in a single vector register.
35 (define_mode_iterator VSX_LE_128 [(KF "FLOAT128_VECTOR_P (KFmode)")
36 (TF "FLOAT128_VECTOR_P (TFmode)")
40 ;; Iterator for the 2 32-bit vector types
41 (define_mode_iterator VSX_W [V4SF V4SI])
43 ;; Iterator for the DF types
44 (define_mode_iterator VSX_DF [V2DF DF])
46 ;; Iterator for vector floating point types supported by VSX
47 (define_mode_iterator VSX_F [V4SF V2DF])
49 ;; Iterator for logical types supported by VSX
50 (define_mode_iterator VSX_L [V16QI
58 (KF "FLOAT128_VECTOR_P (KFmode)")
59 (TF "FLOAT128_VECTOR_P (TFmode)")])
61 ;; Iterator for memory moves.
62 (define_mode_iterator VSX_M [V16QI
69 (KF "FLOAT128_VECTOR_P (KFmode)")
70 (TF "FLOAT128_VECTOR_P (TFmode)")
73 (define_mode_attr VSX_XXBR [(V8HI "h")
80 ;; Map into the appropriate load/store name based on the type
81 (define_mode_attr VSm [(V16QI "vw4")
93 ;; Map the register class used
94 (define_mode_attr VSr [(V16QI "v")
108 ;; What value we need in the "isa" field, to make the IEEE QP float work.
109 (define_mode_attr VSisa [(V16QI "*")
123 ;; A mode attribute to disparage use of GPR registers, except for scalar
125 (define_mode_attr ??r [(V16QI "??r")
136 ;; A mode attribute used for 128-bit constant values.
137 (define_mode_attr nW [(V16QI "W")
148 ;; Same size integer type for floating point data
149 (define_mode_attr VSi [(V4SF "v4si")
153 (define_mode_attr VSI [(V4SF "V4SI")
157 ;; Word size for same size conversion
158 (define_mode_attr VSc [(V4SF "w")
162 ;; Map into either s or v, depending on whether this is a scalar or vector
164 (define_mode_attr VSv [(V16QI "v")
174 ;; Appropriate type for add ops (and other simple FP ops)
175 (define_mode_attr VStype_simple [(V2DF "vecdouble")
179 ;; Appropriate type for multiply ops
180 (define_mode_attr VStype_mul [(V2DF "vecdouble")
184 ;; Appropriate type for divide ops.
185 (define_mode_attr VStype_div [(V2DF "vecdiv")
189 ;; Map to a double-sized vector mode
190 (define_mode_attr VS_double [(V4SI "V8SI")
196 ;; Iterators for loading constants with xxspltib
197 (define_mode_iterator VSINT_84 [V4SI V2DI DI SI])
198 (define_mode_iterator VSINT_842 [V8HI V4SI V2DI])
200 ;; Vector reverse byte modes
201 (define_mode_iterator VEC_REVB [V8HI V4SI V2DI V4SF V2DF V1TI])
203 ;; Iterator for ISA 3.0 vector extract/insert of small integer vectors.
204 ;; VSX_EXTRACT_I2 doesn't include V4SImode because SI extracts can be
205 ;; done on ISA 2.07 and not just ISA 3.0.
206 (define_mode_iterator VSX_EXTRACT_I [V16QI V8HI V4SI])
207 (define_mode_iterator VSX_EXTRACT_I2 [V16QI V8HI])
208 (define_mode_iterator VSX_EXTRACT_I4 [V16QI V8HI V4SI V2DI])
210 (define_mode_attr VSX_EXTRACT_WIDTH [(V16QI "b")
214 ;; Mode attribute to give the correct predicate for ISA 3.0 vector extract and
215 ;; insert to validate the operand number.
216 (define_mode_attr VSX_EXTRACT_PREDICATE [(V16QI "const_0_to_15_operand")
217 (V8HI "const_0_to_7_operand")
218 (V4SI "const_0_to_3_operand")])
220 ;; Mode attribute to give the constraint for vector extract and insert
222 (define_mode_attr VSX_EX [(V16QI "v")
226 ;; Mode iterator for binary floating types other than double to
227 ;; optimize convert to that floating point type from an extract
228 ;; of an integer type
229 (define_mode_iterator VSX_EXTRACT_FL [SF
230 (IF "FLOAT128_2REG_P (IFmode)")
231 (KF "TARGET_FLOAT128_HW")
232 (TF "FLOAT128_2REG_P (TFmode)
233 || (FLOAT128_IEEE_P (TFmode)
234 && TARGET_FLOAT128_HW)")])
236 ;; Mode iterator for binary floating types that have a direct conversion
237 ;; from 64-bit integer to floating point
238 (define_mode_iterator FL_CONV [SF
240 (KF "TARGET_FLOAT128_HW")
241 (TF "TARGET_FLOAT128_HW
242 && FLOAT128_IEEE_P (TFmode)")])
244 ;; Iterator for the 2 short vector types to do a splat from an integer
245 (define_mode_iterator VSX_SPLAT_I [V16QI V8HI])
247 ;; Mode attribute to give the count for the splat instruction to splat
248 ;; the value in the 64-bit integer slot
249 (define_mode_attr VSX_SPLAT_COUNT [(V16QI "7") (V8HI "3")])
251 ;; Mode attribute to give the suffix for the splat instruction
252 (define_mode_attr VSX_SPLAT_SUFFIX [(V16QI "b") (V8HI "h")])
254 ;; Iterator for the move to mask instructions
255 (define_mode_iterator VSX_MM [V16QI V8HI V4SI V2DI V1TI])
256 (define_mode_iterator VSX_MM4 [V16QI V8HI V4SI V2DI])
258 ;; Longer vec int modes for rotate/mask ops
259 ;; and Vector Integer Multiply/Divide/Modulo Instructions
260 (define_mode_iterator VIlong [V2DI V4SI])
262 ;; Constants for creating unspecs
263 (define_c_enum "unspec"
276 UNSPEC_VSX_UNS_FLOAT2
278 UNSPEC_VSX_UNS_FLOATE
280 UNSPEC_VSX_UNS_FLOATO
300 UNSPEC_VSX_SIGN_EXTEND
301 UNSPEC_VSX_XVCVBF16SPN
302 UNSPEC_VSX_XVCVSPBF16
303 UNSPEC_VSX_XVCVSPSXDS
314 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH
315 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL
347 UNSPEC_VSX_FIRST_MATCH_INDEX
348 UNSPEC_VSX_FIRST_MATCH_EOS_INDEX
349 UNSPEC_VSX_FIRST_MISMATCH_INDEX
350 UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX
374 (define_int_iterator XVCVBF16 [UNSPEC_VSX_XVCVSPBF16
375 UNSPEC_VSX_XVCVBF16SPN])
377 (define_int_attr xvcvbf16 [(UNSPEC_VSX_XVCVSPBF16 "xvcvspbf16")
378 (UNSPEC_VSX_XVCVBF16SPN "xvcvbf16spn")])
380 ;; Like VI, defined in vector.md, but add ISA 2.07 integer vector ops
381 (define_mode_iterator VI2 [V4SI V8HI V16QI V2DI])
383 ;; Aligned and unaligned vector replace iterator/attr for 32-bit and
385 (define_mode_iterator REPLACE_ELT_V [V4SI V4SF V2DI V2DF])
386 (define_mode_iterator REPLACE_ELT [SI SF DI DF])
387 (define_mode_attr REPLACE_ELT_char [(V4SI "w") (V4SF "w")
388 (V2DI "d") (V2DF "d")
391 (define_mode_attr REPLACE_ELT_sh [(V4SI "2") (V4SF "2")
392 (V2DI "3") (V2DF "3")])
393 (define_mode_attr REPLACE_ELT_max [(V4SI "12") (V4SF "12")
394 (V2DI "8") (V2DF "8")])
396 ;; Like VM2 in altivec.md, just do char, short, int, long, float and double
397 (define_mode_iterator VM3 [V4SI
403 (define_mode_iterator V2DI_DI [V2DI DI])
404 (define_mode_attr DI_to_TI [(V2DI "V1TI")
407 (define_mode_attr VM3_char [(V2DI "d")
414 ;; Iterator and attribute for vector count leading/trailing
415 ;; zero least-significant bits byte
416 (define_int_iterator VCZLSBB [UNSPEC_VCLZLSBB
418 (define_int_attr vczlsbb_char [(UNSPEC_VCLZLSBB "l")
419 (UNSPEC_VCTZLSBB "t")])
423 ;; TImode memory to memory move optimization on LE with p8vector
424 (define_insn_and_split "*vsx_le_mem_to_mem_mov_ti"
425 [(set (match_operand:TI 0 "indexed_or_indirect_operand" "=Z")
426 (match_operand:TI 1 "indexed_or_indirect_operand" "Z"))]
430 && can_create_pseudo_p ()"
435 rtx tmp = gen_reg_rtx (V2DImode);
436 rtx src = adjust_address (operands[1], V2DImode, 0);
437 emit_insn (gen_vsx_ld_elemrev_v2di (tmp, src));
438 rtx dest = adjust_address (operands[0], V2DImode, 0);
439 emit_insn (gen_vsx_st_elemrev_v2di (dest, tmp));
442 [(set_attr "length" "16")])
444 ;; The patterns for LE permuted loads and stores come before the general
445 ;; VSX moves so they match first.
446 (define_insn_and_split "*vsx_le_perm_load_<mode>"
447 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
448 (match_operand:VSX_D 1 "indexed_or_indirect_operand" "Z"))]
449 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
455 (parallel [(const_int 1) (const_int 0)])))
459 (parallel [(const_int 1) (const_int 0)])))]
461 rtx mem = operands[1];
463 /* Don't apply the swap optimization if we've already performed register
464 allocation and the hard register destination is not in the altivec
466 if ((MEM_ALIGN (mem) >= 128)
467 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[0]))
468 || ALTIVEC_REGNO_P (reg_or_subregno (operands[0]))))
470 rtx mem_address = XEXP (mem, 0);
471 enum machine_mode mode = GET_MODE (mem);
473 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
475 /* Replace the source memory address with masked address. */
476 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
477 emit_insn (lvx_set_expr);
480 else if (rs6000_quadword_masked_address_p (mem_address))
482 /* This rtl is already in the form that matches lvx
483 instruction, so leave it alone. */
486 /* Otherwise, fall through to transform into a swapping load. */
488 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
491 [(set_attr "type" "vecload")
492 (set_attr "length" "8")])
494 (define_insn_and_split "*vsx_le_perm_load_<mode>"
495 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
496 (match_operand:VSX_W 1 "indexed_or_indirect_operand" "Z"))]
497 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
503 (parallel [(const_int 2) (const_int 3)
504 (const_int 0) (const_int 1)])))
508 (parallel [(const_int 2) (const_int 3)
509 (const_int 0) (const_int 1)])))]
511 rtx mem = operands[1];
513 /* Don't apply the swap optimization if we've already performed register
514 allocation and the hard register destination is not in the altivec
516 if ((MEM_ALIGN (mem) >= 128)
517 && (!HARD_REGISTER_P (operands[0])
518 || ALTIVEC_REGNO_P (REGNO(operands[0]))))
520 rtx mem_address = XEXP (mem, 0);
521 enum machine_mode mode = GET_MODE (mem);
523 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
525 /* Replace the source memory address with masked address. */
526 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
527 emit_insn (lvx_set_expr);
530 else if (rs6000_quadword_masked_address_p (mem_address))
532 /* This rtl is already in the form that matches lvx
533 instruction, so leave it alone. */
536 /* Otherwise, fall through to transform into a swapping load. */
538 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
541 [(set_attr "type" "vecload")
542 (set_attr "length" "8")])
544 (define_insn_and_split "*vsx_le_perm_load_v8hi"
545 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
546 (match_operand:V8HI 1 "indexed_or_indirect_operand" "Z"))]
547 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
553 (parallel [(const_int 4) (const_int 5)
554 (const_int 6) (const_int 7)
555 (const_int 0) (const_int 1)
556 (const_int 2) (const_int 3)])))
560 (parallel [(const_int 4) (const_int 5)
561 (const_int 6) (const_int 7)
562 (const_int 0) (const_int 1)
563 (const_int 2) (const_int 3)])))]
565 rtx mem = operands[1];
567 /* Don't apply the swap optimization if we've already performed register
568 allocation and the hard register destination is not in the altivec
570 if ((MEM_ALIGN (mem) >= 128)
571 && (!HARD_REGISTER_P (operands[0])
572 || ALTIVEC_REGNO_P (REGNO(operands[0]))))
574 rtx mem_address = XEXP (mem, 0);
575 enum machine_mode mode = GET_MODE (mem);
577 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
579 /* Replace the source memory address with masked address. */
580 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
581 emit_insn (lvx_set_expr);
584 else if (rs6000_quadword_masked_address_p (mem_address))
586 /* This rtl is already in the form that matches lvx
587 instruction, so leave it alone. */
590 /* Otherwise, fall through to transform into a swapping load. */
592 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
595 [(set_attr "type" "vecload")
596 (set_attr "length" "8")])
598 (define_insn_and_split "*vsx_le_perm_load_v16qi"
599 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
600 (match_operand:V16QI 1 "indexed_or_indirect_operand" "Z"))]
601 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
607 (parallel [(const_int 8) (const_int 9)
608 (const_int 10) (const_int 11)
609 (const_int 12) (const_int 13)
610 (const_int 14) (const_int 15)
611 (const_int 0) (const_int 1)
612 (const_int 2) (const_int 3)
613 (const_int 4) (const_int 5)
614 (const_int 6) (const_int 7)])))
618 (parallel [(const_int 8) (const_int 9)
619 (const_int 10) (const_int 11)
620 (const_int 12) (const_int 13)
621 (const_int 14) (const_int 15)
622 (const_int 0) (const_int 1)
623 (const_int 2) (const_int 3)
624 (const_int 4) (const_int 5)
625 (const_int 6) (const_int 7)])))]
627 rtx mem = operands[1];
629 /* Don't apply the swap optimization if we've already performed register
630 allocation and the hard register destination is not in the altivec
632 if ((MEM_ALIGN (mem) >= 128)
633 && (!HARD_REGISTER_P (operands[0])
634 || ALTIVEC_REGNO_P (REGNO(operands[0]))))
636 rtx mem_address = XEXP (mem, 0);
637 enum machine_mode mode = GET_MODE (mem);
639 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
641 /* Replace the source memory address with masked address. */
642 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
643 emit_insn (lvx_set_expr);
646 else if (rs6000_quadword_masked_address_p (mem_address))
648 /* This rtl is already in the form that matches lvx
649 instruction, so leave it alone. */
652 /* Otherwise, fall through to transform into a swapping load. */
654 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
657 [(set_attr "type" "vecload")
658 (set_attr "length" "8")])
660 (define_insn "*vsx_le_perm_store_<mode>"
661 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand" "=Z")
662 (match_operand:VSX_D 1 "vsx_register_operand" "+wa"))]
663 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
665 [(set_attr "type" "vecstore")
666 (set_attr "length" "12")])
669 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand")
670 (match_operand:VSX_D 1 "vsx_register_operand"))]
671 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
675 (parallel [(const_int 1) (const_int 0)])))
679 (parallel [(const_int 1) (const_int 0)])))]
681 rtx mem = operands[0];
683 /* Don't apply the swap optimization if we've already performed register
684 allocation and the hard register source is not in the altivec range. */
685 if ((MEM_ALIGN (mem) >= 128)
686 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
687 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
689 rtx mem_address = XEXP (mem, 0);
690 enum machine_mode mode = GET_MODE (mem);
691 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
693 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
694 emit_insn (stvx_set_expr);
697 else if (rs6000_quadword_masked_address_p (mem_address))
699 /* This rtl is already in the form that matches stvx instruction,
700 so leave it alone. */
703 /* Otherwise, fall through to transform into a swapping store. */
706 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
710 ;; The post-reload split requires that we re-permute the source
711 ;; register in case it is still live.
713 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand")
714 (match_operand:VSX_D 1 "vsx_register_operand"))]
715 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
719 (parallel [(const_int 1) (const_int 0)])))
723 (parallel [(const_int 1) (const_int 0)])))
727 (parallel [(const_int 1) (const_int 0)])))]
730 (define_insn "*vsx_le_perm_store_<mode>"
731 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand" "=Z")
732 (match_operand:VSX_W 1 "vsx_register_operand" "+wa"))]
733 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
735 [(set_attr "type" "vecstore")
736 (set_attr "length" "12")])
739 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand")
740 (match_operand:VSX_W 1 "vsx_register_operand"))]
741 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
745 (parallel [(const_int 2) (const_int 3)
746 (const_int 0) (const_int 1)])))
750 (parallel [(const_int 2) (const_int 3)
751 (const_int 0) (const_int 1)])))]
753 rtx mem = operands[0];
755 /* Don't apply the swap optimization if we've already performed register
756 allocation and the hard register source is not in the altivec range. */
757 if ((MEM_ALIGN (mem) >= 128)
758 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
759 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
761 rtx mem_address = XEXP (mem, 0);
762 enum machine_mode mode = GET_MODE (mem);
763 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
765 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
766 emit_insn (stvx_set_expr);
769 else if (rs6000_quadword_masked_address_p (mem_address))
771 /* This rtl is already in the form that matches stvx instruction,
772 so leave it alone. */
775 /* Otherwise, fall through to transform into a swapping store. */
778 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
782 ;; The post-reload split requires that we re-permute the source
783 ;; register in case it is still live.
785 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand")
786 (match_operand:VSX_W 1 "vsx_register_operand"))]
787 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
791 (parallel [(const_int 2) (const_int 3)
792 (const_int 0) (const_int 1)])))
796 (parallel [(const_int 2) (const_int 3)
797 (const_int 0) (const_int 1)])))
801 (parallel [(const_int 2) (const_int 3)
802 (const_int 0) (const_int 1)])))]
805 (define_insn "*vsx_le_perm_store_v8hi"
806 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand" "=Z")
807 (match_operand:V8HI 1 "vsx_register_operand" "+wa"))]
808 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
810 [(set_attr "type" "vecstore")
811 (set_attr "length" "12")])
814 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand")
815 (match_operand:V8HI 1 "vsx_register_operand"))]
816 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
820 (parallel [(const_int 4) (const_int 5)
821 (const_int 6) (const_int 7)
822 (const_int 0) (const_int 1)
823 (const_int 2) (const_int 3)])))
827 (parallel [(const_int 4) (const_int 5)
828 (const_int 6) (const_int 7)
829 (const_int 0) (const_int 1)
830 (const_int 2) (const_int 3)])))]
832 rtx mem = operands[0];
834 /* Don't apply the swap optimization if we've already performed register
835 allocation and the hard register source is not in the altivec range. */
836 if ((MEM_ALIGN (mem) >= 128)
837 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
838 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
840 rtx mem_address = XEXP (mem, 0);
841 enum machine_mode mode = GET_MODE (mem);
842 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
844 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
845 emit_insn (stvx_set_expr);
848 else if (rs6000_quadword_masked_address_p (mem_address))
850 /* This rtl is already in the form that matches stvx instruction,
851 so leave it alone. */
854 /* Otherwise, fall through to transform into a swapping store. */
857 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
861 ;; The post-reload split requires that we re-permute the source
862 ;; register in case it is still live.
864 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand")
865 (match_operand:V8HI 1 "vsx_register_operand"))]
866 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
870 (parallel [(const_int 4) (const_int 5)
871 (const_int 6) (const_int 7)
872 (const_int 0) (const_int 1)
873 (const_int 2) (const_int 3)])))
877 (parallel [(const_int 4) (const_int 5)
878 (const_int 6) (const_int 7)
879 (const_int 0) (const_int 1)
880 (const_int 2) (const_int 3)])))
884 (parallel [(const_int 4) (const_int 5)
885 (const_int 6) (const_int 7)
886 (const_int 0) (const_int 1)
887 (const_int 2) (const_int 3)])))]
890 (define_insn "*vsx_le_perm_store_v16qi"
891 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand" "=Z")
892 (match_operand:V16QI 1 "vsx_register_operand" "+wa"))]
893 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
895 [(set_attr "type" "vecstore")
896 (set_attr "length" "12")])
899 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand")
900 (match_operand:V16QI 1 "vsx_register_operand"))]
901 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
905 (parallel [(const_int 8) (const_int 9)
906 (const_int 10) (const_int 11)
907 (const_int 12) (const_int 13)
908 (const_int 14) (const_int 15)
909 (const_int 0) (const_int 1)
910 (const_int 2) (const_int 3)
911 (const_int 4) (const_int 5)
912 (const_int 6) (const_int 7)])))
916 (parallel [(const_int 8) (const_int 9)
917 (const_int 10) (const_int 11)
918 (const_int 12) (const_int 13)
919 (const_int 14) (const_int 15)
920 (const_int 0) (const_int 1)
921 (const_int 2) (const_int 3)
922 (const_int 4) (const_int 5)
923 (const_int 6) (const_int 7)])))]
925 rtx mem = operands[0];
927 /* Don't apply the swap optimization if we've already performed register
928 allocation and the hard register source is not in the altivec range. */
929 if ((MEM_ALIGN (mem) >= 128)
930 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
931 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
933 rtx mem_address = XEXP (mem, 0);
934 enum machine_mode mode = GET_MODE (mem);
935 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
937 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
938 emit_insn (stvx_set_expr);
941 else if (rs6000_quadword_masked_address_p (mem_address))
943 /* This rtl is already in the form that matches stvx instruction,
944 so leave it alone. */
947 /* Otherwise, fall through to transform into a swapping store. */
950 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
954 ;; The post-reload split requires that we re-permute the source
955 ;; register in case it is still live.
957 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand")
958 (match_operand:V16QI 1 "vsx_register_operand"))]
959 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
963 (parallel [(const_int 8) (const_int 9)
964 (const_int 10) (const_int 11)
965 (const_int 12) (const_int 13)
966 (const_int 14) (const_int 15)
967 (const_int 0) (const_int 1)
968 (const_int 2) (const_int 3)
969 (const_int 4) (const_int 5)
970 (const_int 6) (const_int 7)])))
974 (parallel [(const_int 8) (const_int 9)
975 (const_int 10) (const_int 11)
976 (const_int 12) (const_int 13)
977 (const_int 14) (const_int 15)
978 (const_int 0) (const_int 1)
979 (const_int 2) (const_int 3)
980 (const_int 4) (const_int 5)
981 (const_int 6) (const_int 7)])))
985 (parallel [(const_int 8) (const_int 9)
986 (const_int 10) (const_int 11)
987 (const_int 12) (const_int 13)
988 (const_int 14) (const_int 15)
989 (const_int 0) (const_int 1)
990 (const_int 2) (const_int 3)
991 (const_int 4) (const_int 5)
992 (const_int 6) (const_int 7)])))]
995 ;; Little endian word swapping for 128-bit types that are either scalars or the
996 ;; special V1TI container class, which it is not appropriate to use vec_select
998 (define_insn "*vsx_le_permute_<mode>"
999 [(set (match_operand:VEC_TI 0 "nonimmediate_operand" "=wa,wa,Z,&r,&r,Q")
1001 (match_operand:VEC_TI 1 "input_operand" "wa,Z,wa,r,Q,r")
1003 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
1005 xxpermdi %x0,%x1,%x1,2
1008 mr %0,%L1\;mr %L0,%1
1009 ld%U1%X1 %0,%L1\;ld%U1%X1 %L0,%1
1010 std%U0%X0 %L1,%0\;std%U0%X0 %1,%L0"
1011 [(set_attr "length" "*,*,*,8,8,8")
1012 (set_attr "type" "vecperm,vecload,vecstore,*,load,store")])
1014 (define_insn_and_split "*vsx_le_undo_permute_<mode>"
1015 [(set (match_operand:VEC_TI 0 "vsx_register_operand" "=wa,wa")
1018 (match_operand:VEC_TI 1 "vsx_register_operand" "0,wa")
1021 "!BYTES_BIG_ENDIAN && TARGET_VSX"
1026 [(set (match_dup 0) (match_dup 1))]
1028 if (reload_completed && REGNO (operands[0]) == REGNO (operands[1]))
1030 emit_note (NOTE_INSN_DELETED);
1034 [(set_attr "length" "0,4")
1035 (set_attr "type" "veclogical")])
1037 (define_insn_and_split "*vsx_le_perm_load_<mode>"
1038 [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=wa,r")
1039 (match_operand:VSX_LE_128 1 "memory_operand" "Z,Q"))]
1040 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1041 && !altivec_indexed_or_indirect_operand (operands[1], <MODE>mode)"
1045 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1046 && !altivec_indexed_or_indirect_operand (operands[1], <MODE>mode)"
1049 rtx tmp = (can_create_pseudo_p ()
1050 ? gen_reg_rtx_and_attrs (operands[0])
1052 rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
1053 rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
1056 [(set_attr "type" "vecload,load")
1057 (set_attr "length" "8,8")
1058 (set_attr "isa" "<VSisa>,*")])
1060 (define_insn "*vsx_le_perm_store_<mode>"
1061 [(set (match_operand:VSX_LE_128 0 "memory_operand" "=Z,Q")
1062 (match_operand:VSX_LE_128 1 "vsx_register_operand" "+wa,r"))]
1063 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1064 && !altivec_indexed_or_indirect_operand (operands[0], <MODE>mode)"
1068 [(set_attr "type" "vecstore,store")
1069 (set_attr "length" "12,8")
1070 (set_attr "isa" "<VSisa>,*")])
1073 [(set (match_operand:VSX_LE_128 0 "memory_operand")
1074 (match_operand:VSX_LE_128 1 "vsx_register_operand"))]
1075 "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR
1076 && !altivec_indexed_or_indirect_operand (operands[0], <MODE>mode)"
1079 rtx tmp = (can_create_pseudo_p ()
1080 ? gen_reg_rtx_and_attrs (operands[0])
1082 rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
1083 rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
1087 ;; Peepholes to catch loads and stores for TImode if TImode landed in
1088 ;; GPR registers on a little endian system.
1090 [(set (match_operand:VEC_TI 0 "int_reg_operand")
1091 (rotate:VEC_TI (match_operand:VEC_TI 1 "memory_operand")
1093 (set (match_operand:VEC_TI 2 "int_reg_operand")
1094 (rotate:VEC_TI (match_dup 0)
1096 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1097 && (rtx_equal_p (operands[0], operands[2])
1098 || peep2_reg_dead_p (2, operands[0]))"
1099 [(set (match_dup 2) (match_dup 1))])
1102 [(set (match_operand:VEC_TI 0 "int_reg_operand")
1103 (rotate:VEC_TI (match_operand:VEC_TI 1 "int_reg_operand")
1105 (set (match_operand:VEC_TI 2 "memory_operand")
1106 (rotate:VEC_TI (match_dup 0)
1108 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1109 && peep2_reg_dead_p (2, operands[0])"
1110 [(set (match_dup 2) (match_dup 1))])
1112 ;; Peephole to catch memory to memory transfers for TImode if TImode landed in
1113 ;; VSX registers on a little endian system. The vector types and IEEE 128-bit
1114 ;; floating point are handled by the more generic swap elimination pass.
1116 [(set (match_operand:TI 0 "vsx_register_operand")
1117 (rotate:TI (match_operand:TI 1 "vsx_register_operand")
1119 (set (match_operand:TI 2 "vsx_register_operand")
1120 (rotate:TI (match_dup 0)
1122 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1123 && (rtx_equal_p (operands[0], operands[2])
1124 || peep2_reg_dead_p (2, operands[0]))"
1125 [(set (match_dup 2) (match_dup 1))])
1127 ;; The post-reload split requires that we re-permute the source
1128 ;; register in case it is still live.
1130 [(set (match_operand:VSX_LE_128 0 "memory_operand")
1131 (match_operand:VSX_LE_128 1 "vsx_register_operand"))]
1132 "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR
1133 && !altivec_indexed_or_indirect_operand (operands[0], <MODE>mode)"
1136 rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
1137 rs6000_emit_le_vsx_permute (operands[0], operands[1], <MODE>mode);
1138 rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
1142 ;; Vector constants that can be generated with XXSPLTIB that was added in ISA
1143 ;; 3.0. Both (const_vector [..]) and (vec_duplicate ...) forms are recognized.
1144 (define_insn "xxspltib_v16qi"
1145 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1146 (vec_duplicate:V16QI (match_operand:SI 1 "s8bit_cint_operand" "n")))]
1149 operands[2] = GEN_INT (INTVAL (operands[1]) & 0xff);
1150 return "xxspltib %x0,%2";
1152 [(set_attr "type" "vecperm")])
1154 (define_insn "xxspltib_<mode>_nosplit"
1155 [(set (match_operand:VSINT_842 0 "vsx_register_operand" "=wa,wa")
1156 (match_operand:VSINT_842 1 "xxspltib_constant_nosplit" "jwM,wE"))]
1159 rtx op1 = operands[1];
1163 if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
1167 operands[2] = GEN_INT (value & 0xff);
1168 return "xxspltib %x0,%2";
1170 [(set_attr "type" "vecperm")])
1172 (define_insn_and_split "*xxspltib_<mode>_split"
1173 [(set (match_operand:VSINT_842 0 "altivec_register_operand" "=v")
1174 (match_operand:VSINT_842 1 "xxspltib_constant_split" "wS"))]
1182 rtx op0 = operands[0];
1183 rtx op1 = operands[1];
1184 rtx tmp = ((can_create_pseudo_p ())
1185 ? gen_reg_rtx (V16QImode)
1186 : gen_lowpart (V16QImode, op0));
1188 if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
1192 emit_insn (gen_xxspltib_v16qi (tmp, GEN_INT (value)));
1194 if (<MODE>mode == V2DImode)
1195 emit_insn (gen_vsx_sign_extend_v16qi_v2di (op0, tmp));
1197 else if (<MODE>mode == V4SImode)
1198 emit_insn (gen_vsx_sign_extend_v16qi_v4si (op0, tmp));
1200 else if (<MODE>mode == V8HImode)
1201 emit_insn (gen_altivec_vupkhsb (op0, tmp));
1208 [(set_attr "type" "vecperm")
1209 (set_attr "length" "8")])
1211 (define_insn_and_split "*vspltisw_v2di_split"
1212 [(set (match_operand:V2DI 0 "altivec_register_operand" "=v")
1213 (match_operand:V2DI 1 "vspltisw_vupkhsw_constant_split" "W"))]
1214 "TARGET_P8_VECTOR && vspltisw_vupkhsw_constant_split (operands[1], V2DImode)"
1219 rtx op0 = operands[0];
1220 rtx op1 = operands[1];
1221 rtx tmp = can_create_pseudo_p ()
1222 ? gen_reg_rtx (V4SImode)
1223 : gen_lowpart (V4SImode, op0);
1226 vspltisw_vupkhsw_constant_p (op1, V2DImode, &value);
1227 emit_insn (gen_altivec_vspltisw (tmp, GEN_INT (value)));
1228 emit_insn (gen_altivec_vupkhsw_direct (op0, tmp));
1232 [(set_attr "type" "vecperm")
1233 (set_attr "length" "8")])
1236 ;; Prefer using vector registers over GPRs. Prefer using ISA 3.0's XXSPLTISB
1237 ;; or Altivec VSPLITW 0/-1 over XXLXOR/XXLORC to set a register to all 0's or
1238 ;; all 1's, since the machine does not have to wait for the previous
1239 ;; instruction using the register being set (such as a store waiting on a slow
1240 ;; instruction). But generate XXLXOR/XXLORC if it will avoid a register move.
1242 ;; VSX store VSX load VSX move VSX->GPR GPR->VSX LQ (GPR)
1243 ;; STQ (GPR) GPR load GPR store GPR move XXSPLTIB VSPLTISW
1245 ;; VSX 0/-1 VMX const GPR const LVX (VMX) STVX (VMX)
1246 (define_insn "vsx_mov<mode>_64bit"
1247 [(set (match_operand:VSX_M 0 "nonimmediate_operand"
1248 "=ZwO, wa, wa, r, we, ?wQ,
1249 ?&r, ??r, ??Y, <??r>, wa, v,
1251 ?wa, v, <??r>, wZ, v")
1253 (match_operand:VSX_M 1 "input_operand"
1254 "wa, ZwO, wa, we, r, r,
1255 wQ, Y, r, r, wE, jwM,
1257 ?jwM, W, <nW>, v, wZ"))]
1259 "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1260 && (register_operand (operands[0], <MODE>mode)
1261 || register_operand (operands[1], <MODE>mode))"
1263 return rs6000_output_move_128bit (operands);
1266 "vecstore, vecload, vecsimple, mtvsr, mfvsr, load,
1267 store, load, store, *, vecsimple, vecsimple,
1269 vecsimple, *, *, vecstore, vecload")
1270 (set_attr "num_insns"
1275 (set_attr "max_prefixed_insns"
1286 "<VSisa>, <VSisa>, <VSisa>, *, *, *,
1289 <VSisa>, *, *, *, *")
1290 (set_attr "prefixed"
1296 ;; VSX store VSX load VSX move GPR load GPR store GPR move
1298 ;; XXSPLTIB VSPLTISW VSX 0/-1 VMX const GPR const
1299 ;; LVX (VMX) STVX (VMX)
1300 (define_insn "*vsx_mov<mode>_32bit"
1301 [(set (match_operand:VSX_M 0 "nonimmediate_operand"
1302 "=ZwO, wa, wa, ??r, ??Y, <??r>,
1304 wa, v, ?wa, v, <??r>,
1307 (match_operand:VSX_M 1 "input_operand"
1308 "wa, ZwO, wa, Y, r, r,
1310 wE, jwM, ?jwM, W, <nW>,
1313 "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1314 && (register_operand (operands[0], <MODE>mode)
1315 || register_operand (operands[1], <MODE>mode))"
1317 return rs6000_output_move_128bit (operands);
1320 "vecstore, vecload, vecsimple, load, store, *,
1322 vecsimple, vecsimple, vecsimple, *, *,
1325 "*, *, *, 16, 16, 16,
1330 "<VSisa>, <VSisa>, <VSisa>, *, *, *,
1332 p9v, *, <VSisa>, *, *,
1334 (set_attr "prefixed"
1340 ;; Explicit load/store expanders for the builtin functions
1341 (define_expand "vsx_load_<mode>"
1342 [(set (match_operand:VSX_M 0 "vsx_register_operand")
1343 (match_operand:VSX_M 1 "memory_operand"))]
1344 "VECTOR_MEM_VSX_P (<MODE>mode)"
1346 /* Expand to swaps if needed, prior to swap optimization. */
1347 if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR
1348 && !altivec_indexed_or_indirect_operand(operands[1], <MODE>mode))
1350 rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1355 (define_expand "vsx_store_<mode>"
1356 [(set (match_operand:VSX_M 0 "memory_operand")
1357 (match_operand:VSX_M 1 "vsx_register_operand"))]
1358 "VECTOR_MEM_VSX_P (<MODE>mode)"
1360 /* Expand to swaps if needed, prior to swap optimization. */
1361 if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR
1362 && !altivec_indexed_or_indirect_operand(operands[0], <MODE>mode))
1364 rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1369 ;; Load rightmost element from load_data
1370 ;; using lxvrbx, lxvrhx, lxvrwx, lxvrdx.
1371 (define_insn "vsx_lxvr<wd>x"
1372 [(set (match_operand:TI 0 "vsx_register_operand" "=wa")
1373 (zero_extend:TI (match_operand:INT_ISA3 1 "memory_operand" "Z")))]
1376 [(set_attr "type" "vecload")])
1378 ;; Store rightmost element into store_data
1379 ;; using stxvrbx, stxvrhx, strvxwx, strvxdx.
1380 (define_insn "vsx_stxvr<wd>x"
1381 [(set (match_operand:INT_ISA3 0 "memory_operand" "=Z")
1382 (truncate:INT_ISA3 (match_operand:TI 1 "vsx_register_operand" "wa")))]
1384 "stxvr<wd>x %x1,%y0"
1385 [(set_attr "type" "vecstore")])
1387 ;; Explicit load/store expanders for the builtin functions for lxvd2x, etc.,
1388 ;; when you really want their element-reversing behavior.
1389 (define_insn "vsx_ld_elemrev_v2di"
1390 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1392 (match_operand:V2DI 1 "memory_operand" "Z")
1393 (parallel [(const_int 1) (const_int 0)])))]
1394 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1396 [(set_attr "type" "vecload")])
1398 (define_insn "vsx_ld_elemrev_v1ti"
1399 [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
1401 (match_operand:V1TI 1 "memory_operand" "Z")
1402 (parallel [(const_int 0)])))]
1403 "VECTOR_MEM_VSX_P (V1TImode) && !BYTES_BIG_ENDIAN"
1405 return "lxvd2x %x0,%y1\;xxpermdi %x0,%x0,%x0,2";
1407 [(set_attr "type" "vecload")])
1409 (define_insn "vsx_ld_elemrev_v2df"
1410 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
1412 (match_operand:V2DF 1 "memory_operand" "Z")
1413 (parallel [(const_int 1) (const_int 0)])))]
1414 "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1416 [(set_attr "type" "vecload")])
1418 (define_insn "vsx_ld_elemrev_v4si"
1419 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
1421 (match_operand:V4SI 1 "memory_operand" "Z")
1422 (parallel [(const_int 3) (const_int 2)
1423 (const_int 1) (const_int 0)])))]
1424 "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1426 [(set_attr "type" "vecload")])
1428 (define_insn "vsx_ld_elemrev_v4sf"
1429 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
1431 (match_operand:V4SF 1 "memory_operand" "Z")
1432 (parallel [(const_int 3) (const_int 2)
1433 (const_int 1) (const_int 0)])))]
1434 "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1436 [(set_attr "type" "vecload")])
1438 (define_expand "vsx_ld_elemrev_v8hi"
1439 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1441 (match_operand:V8HI 1 "memory_operand" "Z")
1442 (parallel [(const_int 7) (const_int 6)
1443 (const_int 5) (const_int 4)
1444 (const_int 3) (const_int 2)
1445 (const_int 1) (const_int 0)])))]
1446 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
1448 if (!TARGET_P9_VECTOR)
1450 rtx tmp = gen_reg_rtx (V4SImode);
1451 rtx subreg, subreg2, perm[16], pcv;
1452 /* 2 is leftmost element in register */
1453 unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
1456 subreg = simplify_gen_subreg (V4SImode, operands[1], V8HImode, 0);
1457 emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
1458 subreg2 = simplify_gen_subreg (V8HImode, tmp, V4SImode, 0);
1460 for (i = 0; i < 16; ++i)
1461 perm[i] = GEN_INT (reorder[i]);
1463 pcv = force_reg (V16QImode,
1464 gen_rtx_CONST_VECTOR (V16QImode,
1465 gen_rtvec_v (16, perm)));
1466 emit_insn (gen_altivec_vperm_v8hi_direct (operands[0], subreg2,
1472 (define_insn "*vsx_ld_elemrev_v8hi_internal"
1473 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1475 (match_operand:V8HI 1 "memory_operand" "Z")
1476 (parallel [(const_int 7) (const_int 6)
1477 (const_int 5) (const_int 4)
1478 (const_int 3) (const_int 2)
1479 (const_int 1) (const_int 0)])))]
1480 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1482 [(set_attr "type" "vecload")])
1484 (define_expand "vsx_ld_elemrev_v16qi"
1485 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1487 (match_operand:V16QI 1 "memory_operand" "Z")
1488 (parallel [(const_int 15) (const_int 14)
1489 (const_int 13) (const_int 12)
1490 (const_int 11) (const_int 10)
1491 (const_int 9) (const_int 8)
1492 (const_int 7) (const_int 6)
1493 (const_int 5) (const_int 4)
1494 (const_int 3) (const_int 2)
1495 (const_int 1) (const_int 0)])))]
1496 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
1498 if (!TARGET_P9_VECTOR)
1500 rtx tmp = gen_reg_rtx (V4SImode);
1501 rtx subreg, subreg2, perm[16], pcv;
1502 /* 3 is leftmost element in register */
1503 unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
1506 subreg = simplify_gen_subreg (V4SImode, operands[1], V16QImode, 0);
1507 emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
1508 subreg2 = simplify_gen_subreg (V16QImode, tmp, V4SImode, 0);
1510 for (i = 0; i < 16; ++i)
1511 perm[i] = GEN_INT (reorder[i]);
1513 pcv = force_reg (V16QImode,
1514 gen_rtx_CONST_VECTOR (V16QImode,
1515 gen_rtvec_v (16, perm)));
1516 emit_insn (gen_altivec_vperm_v16qi_direct (operands[0], subreg2,
1522 (define_insn "vsx_ld_elemrev_v16qi_internal"
1523 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1525 (match_operand:V16QI 1 "memory_operand" "Z")
1526 (parallel [(const_int 15) (const_int 14)
1527 (const_int 13) (const_int 12)
1528 (const_int 11) (const_int 10)
1529 (const_int 9) (const_int 8)
1530 (const_int 7) (const_int 6)
1531 (const_int 5) (const_int 4)
1532 (const_int 3) (const_int 2)
1533 (const_int 1) (const_int 0)])))]
1534 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1536 [(set_attr "type" "vecload")])
1538 (define_insn "vsx_st_elemrev_v1ti"
1539 [(set (match_operand:V1TI 0 "memory_operand" "=Z")
1541 (match_operand:V1TI 1 "vsx_register_operand" "+wa")
1542 (parallel [(const_int 0)])))
1543 (clobber (match_dup 1))]
1544 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1546 return "xxpermdi %x1,%x1,%x1,2\;stxvd2x %x1,%y0";
1548 [(set_attr "type" "vecstore")])
1550 (define_insn "vsx_st_elemrev_v2df"
1551 [(set (match_operand:V2DF 0 "memory_operand" "=Z")
1553 (match_operand:V2DF 1 "vsx_register_operand" "wa")
1554 (parallel [(const_int 1) (const_int 0)])))]
1555 "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1557 [(set_attr "type" "vecstore")])
1559 (define_insn "vsx_st_elemrev_v2di"
1560 [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1562 (match_operand:V2DI 1 "vsx_register_operand" "wa")
1563 (parallel [(const_int 1) (const_int 0)])))]
1564 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1566 [(set_attr "type" "vecstore")])
1568 (define_insn "vsx_st_elemrev_v4sf"
1569 [(set (match_operand:V4SF 0 "memory_operand" "=Z")
1571 (match_operand:V4SF 1 "vsx_register_operand" "wa")
1572 (parallel [(const_int 3) (const_int 2)
1573 (const_int 1) (const_int 0)])))]
1574 "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1576 [(set_attr "type" "vecstore")])
1578 (define_insn "vsx_st_elemrev_v4si"
1579 [(set (match_operand:V4SI 0 "memory_operand" "=Z")
1581 (match_operand:V4SI 1 "vsx_register_operand" "wa")
1582 (parallel [(const_int 3) (const_int 2)
1583 (const_int 1) (const_int 0)])))]
1584 "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1586 [(set_attr "type" "vecstore")])
1588 (define_expand "vsx_st_elemrev_v8hi"
1589 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1591 (match_operand:V8HI 1 "vsx_register_operand" "wa")
1592 (parallel [(const_int 7) (const_int 6)
1593 (const_int 5) (const_int 4)
1594 (const_int 3) (const_int 2)
1595 (const_int 1) (const_int 0)])))]
1596 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
1598 if (!TARGET_P9_VECTOR)
1600 rtx mem_subreg, subreg, perm[16], pcv;
1601 rtx tmp = gen_reg_rtx (V8HImode);
1602 /* 2 is leftmost element in register */
1603 unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
1606 for (i = 0; i < 16; ++i)
1607 perm[i] = GEN_INT (reorder[i]);
1609 pcv = force_reg (V16QImode,
1610 gen_rtx_CONST_VECTOR (V16QImode,
1611 gen_rtvec_v (16, perm)));
1612 emit_insn (gen_altivec_vperm_v8hi_direct (tmp, operands[1],
1614 subreg = simplify_gen_subreg (V4SImode, tmp, V8HImode, 0);
1615 mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V8HImode, 0);
1616 emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
1621 (define_insn "*vsx_st_elemrev_v2di_internal"
1622 [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1624 (match_operand:V2DI 1 "vsx_register_operand" "wa")
1625 (parallel [(const_int 1) (const_int 0)])))]
1626 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1628 [(set_attr "type" "vecstore")])
1630 (define_insn "*vsx_st_elemrev_v8hi_internal"
1631 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1633 (match_operand:V8HI 1 "vsx_register_operand" "wa")
1634 (parallel [(const_int 7) (const_int 6)
1635 (const_int 5) (const_int 4)
1636 (const_int 3) (const_int 2)
1637 (const_int 1) (const_int 0)])))]
1638 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1640 [(set_attr "type" "vecstore")])
1642 (define_expand "vsx_st_elemrev_v16qi"
1643 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1645 (match_operand:V16QI 1 "vsx_register_operand" "wa")
1646 (parallel [(const_int 15) (const_int 14)
1647 (const_int 13) (const_int 12)
1648 (const_int 11) (const_int 10)
1649 (const_int 9) (const_int 8)
1650 (const_int 7) (const_int 6)
1651 (const_int 5) (const_int 4)
1652 (const_int 3) (const_int 2)
1653 (const_int 1) (const_int 0)])))]
1654 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
1656 if (!TARGET_P9_VECTOR)
1658 rtx mem_subreg, subreg, perm[16], pcv;
1659 rtx tmp = gen_reg_rtx (V16QImode);
1660 /* 3 is leftmost element in register */
1661 unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
1664 for (i = 0; i < 16; ++i)
1665 perm[i] = GEN_INT (reorder[i]);
1667 pcv = force_reg (V16QImode,
1668 gen_rtx_CONST_VECTOR (V16QImode,
1669 gen_rtvec_v (16, perm)));
1670 emit_insn (gen_altivec_vperm_v16qi_direct (tmp, operands[1],
1672 subreg = simplify_gen_subreg (V4SImode, tmp, V16QImode, 0);
1673 mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V16QImode, 0);
1674 emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
1679 (define_insn "*vsx_st_elemrev_v16qi_internal"
1680 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1682 (match_operand:V16QI 1 "vsx_register_operand" "wa")
1683 (parallel [(const_int 15) (const_int 14)
1684 (const_int 13) (const_int 12)
1685 (const_int 11) (const_int 10)
1686 (const_int 9) (const_int 8)
1687 (const_int 7) (const_int 6)
1688 (const_int 5) (const_int 4)
1689 (const_int 3) (const_int 2)
1690 (const_int 1) (const_int 0)])))]
1691 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1693 [(set_attr "type" "vecstore")])
1696 ;; VSX vector floating point arithmetic instructions. The VSX scalar
1697 ;; instructions are now combined with the insn for the traditional floating
1699 (define_insn "*vsx_add<mode>3"
1700 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1701 (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1702 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1703 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1704 "xvadd<sd>p %x0,%x1,%x2"
1705 [(set_attr "type" "<VStype_simple>")])
1707 (define_insn "*vsx_sub<mode>3"
1708 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa>")
1709 (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1710 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1711 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1712 "xvsub<sd>p %x0,%x1,%x2"
1713 [(set_attr "type" "<VStype_simple>")])
1715 (define_insn "*vsx_mul<mode>3"
1716 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1717 (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1718 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1719 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1720 "xvmul<sd>p %x0,%x1,%x2"
1721 [(set_attr "type" "<VStype_simple>")])
1723 ; Emulate vector with scalar for vec_mul in V2DImode
1724 (define_insn_and_split "vsx_mul_v2di"
1725 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1726 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1727 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1729 "VECTOR_MEM_VSX_P (V2DImode)"
1731 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1734 rtx op0 = operands[0];
1735 rtx op1 = operands[1];
1736 rtx op2 = operands[2];
1739 emit_insn (gen_mulv2di3 (op0, op1, op2) );
1743 rtx op3 = gen_reg_rtx (DImode);
1744 rtx op4 = gen_reg_rtx (DImode);
1745 rtx op5 = gen_reg_rtx (DImode);
1746 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1747 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1748 if (TARGET_POWERPC64)
1749 emit_insn (gen_muldi3 (op5, op3, op4));
1752 rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
1753 emit_move_insn (op5, ret);
1755 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1756 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1757 if (TARGET_POWERPC64)
1758 emit_insn (gen_muldi3 (op3, op3, op4));
1761 rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
1762 emit_move_insn (op3, ret);
1764 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1768 [(set_attr "type" "mul")])
1770 (define_insn "*vsx_div<mode>3"
1771 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1772 (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1773 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1774 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1775 "xvdiv<sd>p %x0,%x1,%x2"
1776 [(set_attr "type" "<VStype_div>")])
1778 ; Emulate vector with scalar for vec_div in V2DImode
1779 (define_insn_and_split "vsx_div_v2di"
1780 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1781 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1782 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1784 "VECTOR_MEM_VSX_P (V2DImode)"
1786 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1789 rtx op0 = operands[0];
1790 rtx op1 = operands[1];
1791 rtx op2 = operands[2];
1792 rtx op3 = gen_reg_rtx (DImode);
1793 rtx op4 = gen_reg_rtx (DImode);
1794 rtx op5 = gen_reg_rtx (DImode);
1795 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1796 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1797 if (TARGET_POWERPC64)
1798 emit_insn (gen_divdi3 (op5, op3, op4));
1801 rtx libfunc = optab_libfunc (sdiv_optab, DImode);
1802 rtx target = emit_library_call_value (libfunc,
1803 op5, LCT_NORMAL, DImode,
1806 emit_move_insn (op5, target);
1808 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1809 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1810 if (TARGET_POWERPC64)
1811 emit_insn (gen_divdi3 (op3, op3, op4));
1814 rtx libfunc = optab_libfunc (sdiv_optab, DImode);
1815 rtx target = emit_library_call_value (libfunc,
1816 op3, LCT_NORMAL, DImode,
1819 emit_move_insn (op3, target);
1821 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1824 [(set_attr "type" "div")])
1826 (define_insn_and_split "vsx_udiv_v2di"
1827 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1828 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1829 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1831 "VECTOR_MEM_VSX_P (V2DImode)"
1833 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1836 rtx op0 = operands[0];
1837 rtx op1 = operands[1];
1838 rtx op2 = operands[2];
1841 emit_insn (gen_udivv2di3 (op0, op1, op2) );
1844 rtx op3 = gen_reg_rtx (DImode);
1845 rtx op4 = gen_reg_rtx (DImode);
1846 rtx op5 = gen_reg_rtx (DImode);
1848 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1849 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1851 if (TARGET_POWERPC64)
1852 emit_insn (gen_udivdi3 (op5, op3, op4));
1855 rtx libfunc = optab_libfunc (udiv_optab, DImode);
1856 rtx target = emit_library_call_value (libfunc,
1857 op5, LCT_NORMAL, DImode,
1860 emit_move_insn (op5, target);
1862 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1863 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1865 if (TARGET_POWERPC64)
1866 emit_insn (gen_udivdi3 (op3, op3, op4));
1869 rtx libfunc = optab_libfunc (udiv_optab, DImode);
1870 rtx target = emit_library_call_value (libfunc,
1871 op3, LCT_NORMAL, DImode,
1874 emit_move_insn (op3, target);
1876 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1880 [(set_attr "type" "div")])
1882 ;; Vector integer signed/unsigned divide
1883 (define_insn "vsx_div_v1ti"
1884 [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
1885 (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v")
1886 (match_operand:V1TI 2 "vsx_register_operand" "v")]
1890 [(set_attr "type" "div")])
1892 (define_insn "vsx_udiv_v1ti"
1893 [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
1894 (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v")
1895 (match_operand:V1TI 2 "vsx_register_operand" "v")]
1899 [(set_attr "type" "div")])
1901 (define_insn "vsx_dives_v1ti"
1902 [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
1903 (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v")
1904 (match_operand:V1TI 2 "vsx_register_operand" "v")]
1905 UNSPEC_VSX_DIVESQ))]
1908 [(set_attr "type" "div")])
1910 (define_insn "vsx_diveu_v1ti"
1911 [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
1912 (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v")
1913 (match_operand:V1TI 2 "vsx_register_operand" "v")]
1914 UNSPEC_VSX_DIVEUQ))]
1917 [(set_attr "type" "div")])
1919 (define_insn "vsx_mods_v1ti"
1920 [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
1921 (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v")
1922 (match_operand:V1TI 2 "vsx_register_operand" "v")]
1926 [(set_attr "type" "div")])
1928 (define_insn "vsx_modu_v1ti"
1929 [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
1930 (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v")
1931 (match_operand:V1TI 2 "vsx_register_operand" "v")]
1935 [(set_attr "type" "div")])
1937 ;; *tdiv* instruction returning the FG flag
1938 (define_expand "vsx_tdiv<mode>3_fg"
1940 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")
1941 (match_operand:VSX_B 2 "vsx_register_operand")]
1943 (set (match_operand:SI 0 "gpc_reg_operand")
1944 (gt:SI (match_dup 3)
1946 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1948 operands[3] = gen_reg_rtx (CCFPmode);
1951 ;; *tdiv* instruction returning the FE flag
1952 (define_expand "vsx_tdiv<mode>3_fe"
1954 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")
1955 (match_operand:VSX_B 2 "vsx_register_operand")]
1957 (set (match_operand:SI 0 "gpc_reg_operand")
1958 (eq:SI (match_dup 3)
1960 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1962 operands[3] = gen_reg_rtx (CCFPmode);
1965 (define_insn "*vsx_tdiv<mode>3_internal"
1966 [(set (match_operand:CCFP 0 "cc_reg_operand" "=x")
1967 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "wa")
1968 (match_operand:VSX_B 2 "vsx_register_operand" "wa")]
1970 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1971 "x<VSv>tdiv<sd>p %0,%x1,%x2"
1972 [(set_attr "type" "<VStype_simple>")])
1974 (define_insn "vsx_fre<mode>2"
1975 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1976 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
1978 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1980 [(set_attr "type" "<VStype_simple>")])
1982 (define_insn "*vsx_neg<mode>2"
1983 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1984 (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
1985 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1986 "xvneg<sd>p %x0,%x1"
1987 [(set_attr "type" "<VStype_simple>")])
1989 (define_insn "*vsx_abs<mode>2"
1990 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1991 (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
1992 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1993 "xvabs<sd>p %x0,%x1"
1994 [(set_attr "type" "<VStype_simple>")])
1996 (define_insn "vsx_nabs<mode>2"
1997 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2000 (match_operand:VSX_F 1 "vsx_register_operand" "wa"))))]
2001 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2002 "xvnabs<sd>p %x0,%x1"
2003 [(set_attr "type" "<VStype_simple>")])
2005 (define_insn "vsx_smax<mode>3"
2006 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2007 (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
2008 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
2009 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2010 "xvmax<sd>p %x0,%x1,%x2"
2011 [(set_attr "type" "<VStype_simple>")])
2013 (define_insn "*vsx_smin<mode>3"
2014 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2015 (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
2016 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
2017 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2018 "xvmin<sd>p %x0,%x1,%x2"
2019 [(set_attr "type" "<VStype_simple>")])
2021 (define_insn "*vsx_sqrt<mode>2"
2022 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2023 (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
2024 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2025 "xvsqrt<sd>p %x0,%x1"
2026 [(set_attr "type" "<sd>sqrt")])
2028 (define_insn "*vsx_rsqrte<mode>2"
2029 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2030 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
2032 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2033 "xvrsqrte<sd>p %x0,%x1"
2034 [(set_attr "type" "<VStype_simple>")])
2036 ;; *tsqrt* returning the fg flag
2037 (define_expand "vsx_tsqrt<mode>2_fg"
2039 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")]
2041 (set (match_operand:SI 0 "gpc_reg_operand")
2042 (gt:SI (match_dup 2)
2044 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2046 operands[2] = gen_reg_rtx (CCFPmode);
2049 ;; *tsqrt* returning the fe flag
2050 (define_expand "vsx_tsqrt<mode>2_fe"
2052 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")]
2054 (set (match_operand:SI 0 "gpc_reg_operand")
2055 (eq:SI (match_dup 2)
2057 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2059 operands[2] = gen_reg_rtx (CCFPmode);
2062 (define_insn "*vsx_tsqrt<mode>2_internal"
2063 [(set (match_operand:CCFP 0 "cc_reg_operand" "=x")
2064 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
2066 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2067 "x<VSv>tsqrt<sd>p %0,%x1"
2068 [(set_attr "type" "<VStype_simple>")])
2070 ;; Fused vector multiply/add instructions. Do not generate the Altivec versions
2071 ;; of fma (vmaddfp and vnmsubfp). These instructions allows the target to be a
2072 ;; separate register from the 3 inputs, which can possibly save an extra move
2073 ;; being generated (assuming all registers are AltiVec registers). However,
2074 ;; vmaddfp and vnmsubfp can have different behaviors than the VSX instructions
2075 ;; in some corner cases due to VSCR[NJ] being set or if the addend is +0.0
2077 (define_insn "*vsx_fmav4sf4"
2078 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa")
2080 (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa")
2081 (match_operand:V4SF 2 "vsx_register_operand" "wa,0")
2082 (match_operand:V4SF 3 "vsx_register_operand" "0,wa")))]
2083 "VECTOR_UNIT_VSX_P (V4SFmode)"
2085 xvmaddasp %x0,%x1,%x2
2086 xvmaddmsp %x0,%x1,%x3"
2087 [(set_attr "type" "vecfloat")])
2089 (define_insn "*vsx_fmav2df4"
2090 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,wa")
2092 (match_operand:V2DF 1 "vsx_register_operand" "%wa,wa")
2093 (match_operand:V2DF 2 "vsx_register_operand" "wa,0")
2094 (match_operand:V2DF 3 "vsx_register_operand" "0,wa")))]
2095 "VECTOR_UNIT_VSX_P (V2DFmode)"
2097 xvmaddadp %x0,%x1,%x2
2098 xvmaddmdp %x0,%x1,%x3"
2099 [(set_attr "type" "vecdouble")])
2101 (define_insn "*vsx_fms<mode>4"
2102 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa,wa")
2104 (match_operand:VSX_F 1 "vsx_register_operand" "%wa,wa")
2105 (match_operand:VSX_F 2 "vsx_register_operand" "wa,0")
2107 (match_operand:VSX_F 3 "vsx_register_operand" "0,wa"))))]
2108 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2110 xvmsuba<sd>p %x0,%x1,%x2
2111 xvmsubm<sd>p %x0,%x1,%x3"
2112 [(set_attr "type" "<VStype_mul>")])
2114 (define_insn "*vsx_nfma<mode>4"
2115 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa,wa")
2118 (match_operand:VSX_F 1 "vsx_register_operand" "wa,wa")
2119 (match_operand:VSX_F 2 "vsx_register_operand" "wa,0")
2120 (match_operand:VSX_F 3 "vsx_register_operand" "0,wa"))))]
2121 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2123 xvnmadda<sd>p %x0,%x1,%x2
2124 xvnmaddm<sd>p %x0,%x1,%x3"
2125 [(set_attr "type" "<VStype_mul>")])
2127 (define_insn "*vsx_nfmsv4sf4"
2128 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa")
2131 (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa")
2132 (match_operand:V4SF 2 "vsx_register_operand" "wa,0")
2134 (match_operand:V4SF 3 "vsx_register_operand" "0,wa")))))]
2135 "VECTOR_UNIT_VSX_P (V4SFmode)"
2137 xvnmsubasp %x0,%x1,%x2
2138 xvnmsubmsp %x0,%x1,%x3"
2139 [(set_attr "type" "vecfloat")])
2141 (define_insn "*vsx_nfmsv2df4"
2142 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,wa")
2145 (match_operand:V2DF 1 "vsx_register_operand" "%wa,wa")
2146 (match_operand:V2DF 2 "vsx_register_operand" "wa,0")
2148 (match_operand:V2DF 3 "vsx_register_operand" "0,wa")))))]
2149 "VECTOR_UNIT_VSX_P (V2DFmode)"
2151 xvnmsubadp %x0,%x1,%x2
2152 xvnmsubmdp %x0,%x1,%x3"
2153 [(set_attr "type" "vecdouble")])
2155 ;; Vector conditional expressions (no scalar version for these instructions)
2156 (define_insn "vsx_eq<mode>"
2157 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2158 (eq:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
2159 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
2160 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2161 "xvcmpeq<sd>p %x0,%x1,%x2"
2162 [(set_attr "type" "<VStype_simple>")])
2164 (define_insn "vsx_gt<mode>"
2165 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2166 (gt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
2167 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
2168 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2169 "xvcmpgt<sd>p %x0,%x1,%x2"
2170 [(set_attr "type" "<VStype_simple>")])
2172 (define_insn "*vsx_ge<mode>"
2173 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2174 (ge:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
2175 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
2176 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2177 "xvcmpge<sd>p %x0,%x1,%x2"
2178 [(set_attr "type" "<VStype_simple>")])
2180 ;; Compare vectors producing a vector result and a predicate, setting CR6 to
2181 ;; indicate a combined status
2182 (define_insn "*vsx_eq_<mode>_p"
2183 [(set (reg:CC CR6_REGNO)
2185 [(eq:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa")
2186 (match_operand:VSX_F 2 "vsx_register_operand" "wa"))]
2188 (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2189 (eq:VSX_F (match_dup 1)
2191 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2192 "xvcmpeq<sd>p. %x0,%x1,%x2"
2193 [(set_attr "type" "<VStype_simple>")])
2195 (define_insn "*vsx_gt_<mode>_p"
2196 [(set (reg:CC CR6_REGNO)
2198 [(gt:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa")
2199 (match_operand:VSX_F 2 "vsx_register_operand" "wa"))]
2201 (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2202 (gt:VSX_F (match_dup 1)
2204 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2205 "xvcmpgt<sd>p. %x0,%x1,%x2"
2206 [(set_attr "type" "<VStype_simple>")])
2209 ;; Set the CR field BF to indicate if the lowest bit (bit 7) of every byte
2210 ;; element in VSR[XB] is equal to 1 (ALL_TRUE) or equal to 0 (ALL_FALSE).
2211 (define_insn "*xvtlsbb_internal"
2212 [(set (match_operand:CC 0 "cc_reg_operand" "=y")
2213 (unspec:CC [(match_operand:V16QI 1 "vsx_register_operand" "wa")]
2217 [(set_attr "type" "logical")])
2219 ;; Vector Test Least Significant Bit by Byte
2220 ;; for the implementation of the builtin
2221 ;; __builtin_vec_test_lsbb_all_ones
2222 ;; int vec_test_lsbb_all_ones (vector unsigned char);
2224 ;; __builtin_vec_test_lsbb_all_zeros
2225 ;; int vec_test_lsbb_all_zeros (vector unsigned char);
2226 (define_expand "xvtlsbbo"
2228 (unspec:CC [(match_operand:V16QI 1 "vsx_register_operand" "v")]
2230 (set (match_operand:SI 0 "gpc_reg_operand" "=r")
2231 (lt:SI (match_dup 2) (const_int 0)))]
2234 operands[2] = gen_reg_rtx (CCmode);
2236 (define_expand "xvtlsbbz"
2238 (unspec:CC [(match_operand:V16QI 1 "vsx_register_operand" "v")]
2240 (set (match_operand:SI 0 "gpc_reg_operand" "=r")
2241 (eq:SI (match_dup 2) (const_int 0)))]
2244 operands[2] = gen_reg_rtx (CCmode);
2247 (define_insn "*vsx_ge_<mode>_p"
2248 [(set (reg:CC CR6_REGNO)
2250 [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa")
2251 (match_operand:VSX_F 2 "vsx_register_operand" "wa"))]
2253 (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2254 (ge:VSX_F (match_dup 1)
2256 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2257 "xvcmpge<sd>p. %x0,%x1,%x2"
2258 [(set_attr "type" "<VStype_simple>")])
2261 (define_insn "vsx_copysign<mode>3"
2262 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2264 (match_operand:VSX_F 1 "vsx_register_operand" "wa")
2265 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
2266 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2267 "xvcpsgn<sd>p %x0,%x2,%x1"
2268 [(set_attr "type" "<VStype_simple>")])
2270 ;; For the conversions, limit the register class for the integer value to be
2271 ;; the fprs because we don't want to add the altivec registers to movdi/movsi.
2272 ;; For the unsigned tests, there isn't a generic double -> unsigned conversion
2273 ;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX.
2274 ;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md
2275 ;; in allowing virtual registers.
2276 (define_insn "vsx_float<VSi><mode>2"
2277 [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=wa")
2278 (float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "wa")))]
2279 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2280 "xvcvsx<VSc><sd>p %x0,%x1"
2281 [(set_attr "type" "<VStype_simple>")])
2283 (define_insn "vsx_floatuns<VSi><mode>2"
2284 [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=wa")
2285 (unsigned_float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "wa")))]
2286 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2287 "xvcvux<VSc><sd>p %x0,%x1"
2288 [(set_attr "type" "<VStype_simple>")])
2290 (define_insn "vsx_fix_trunc<mode><VSi>2"
2291 [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=wa")
2292 (fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "wa")))]
2293 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2294 "x<VSv>cv<sd>psx<VSc>s %x0,%x1"
2295 [(set_attr "type" "<VStype_simple>")])
2297 (define_insn "vsx_fixuns_trunc<mode><VSi>2"
2298 [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=wa")
2299 (unsigned_fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "wa")))]
2300 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2301 "x<VSv>cv<sd>pux<VSc>s %x0,%x1"
2302 [(set_attr "type" "<VStype_simple>")])
2304 ;; Math rounding functions
2305 (define_insn "vsx_x<VSv>r<sd>pi"
2306 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa")
2307 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
2308 UNSPEC_VSX_ROUND_I))]
2309 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2310 "x<VSv>r<sd>pi %x0,%x1"
2311 [(set_attr "type" "<VStype_simple>")])
2313 (define_insn "vsx_x<VSv>r<sd>pic"
2314 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa")
2315 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
2316 UNSPEC_VSX_ROUND_IC))]
2317 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2318 "x<VSv>r<sd>pic %x0,%x1"
2319 [(set_attr "type" "<VStype_simple>")])
2321 (define_insn "vsx_btrunc<mode>2"
2322 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2323 (fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
2324 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2325 "xvr<sd>piz %x0,%x1"
2326 [(set_attr "type" "<VStype_simple>")])
2328 (define_insn "*vsx_b2trunc<mode>2"
2329 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa")
2330 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
2332 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2333 "x<VSv>r<sd>piz %x0,%x1"
2334 [(set_attr "type" "<VStype_simple>")])
2336 (define_insn "vsx_floor<mode>2"
2337 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2338 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
2340 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2341 "xvr<sd>pim %x0,%x1"
2342 [(set_attr "type" "<VStype_simple>")])
2344 (define_insn "vsx_ceil<mode>2"
2345 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2346 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
2348 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2349 "xvr<sd>pip %x0,%x1"
2350 [(set_attr "type" "<VStype_simple>")])
2353 ;; VSX convert to/from double vector
2355 ;; Convert between single and double precision
2356 ;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal
2357 ;; scalar single precision instructions internally use the double format.
2358 ;; Prefer the altivec registers, since we likely will need to do a vperm
2359 (define_insn "vsx_xscvdpsp"
2360 [(set (match_operand:V4SF 0 "vsx_register_operand" "=f,?wa")
2361 (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "f,wa")]
2362 UNSPEC_VSX_CVSPDP))]
2363 "VECTOR_UNIT_VSX_P (DFmode)"
2365 [(set_attr "type" "fp")])
2367 (define_insn "vsx_xvcvspdp_be"
2368 [(set (match_operand:V2DF 0 "vsx_register_operand" "=v,?wa")
2370 (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2371 (parallel [(const_int 0) (const_int 2)]))))]
2372 "VECTOR_UNIT_VSX_P (V4SFmode) && BYTES_BIG_ENDIAN"
2374 [(set_attr "type" "vecdouble")])
2376 (define_insn "vsx_xvcvspdp_le"
2377 [(set (match_operand:V2DF 0 "vsx_register_operand" "=v,?wa")
2379 (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2380 (parallel [(const_int 1) (const_int 3)]))))]
2381 "VECTOR_UNIT_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
2383 [(set_attr "type" "vecdouble")])
2385 (define_expand "vsx_xvcvspdp"
2386 [(match_operand:V2DF 0 "vsx_register_operand")
2387 (match_operand:V4SF 1 "vsx_register_operand")]
2388 "VECTOR_UNIT_VSX_P (V4SFmode)"
2390 if (BYTES_BIG_ENDIAN)
2391 emit_insn (gen_vsx_xvcvspdp_be (operands[0], operands[1]));
2393 emit_insn (gen_vsx_xvcvspdp_le (operands[0], operands[1]));
2397 (define_insn "vsx_xvcvdpsp"
2398 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,?wa")
2399 (unspec:V4SF [(match_operand:V2DF 1 "vsx_register_operand" "v,wa")]
2400 UNSPEC_VSX_CVSPDP))]
2401 "VECTOR_UNIT_VSX_P (V2DFmode)"
2403 [(set_attr "type" "vecdouble")])
2405 ;; xscvspdp, represent the scalar SF type as V4SF
2406 (define_insn "vsx_xscvspdp"
2407 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2408 (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2409 UNSPEC_VSX_CVSPDP))]
2410 "VECTOR_UNIT_VSX_P (V4SFmode)"
2412 [(set_attr "type" "fp")])
2414 ;; Same as vsx_xscvspdp, but use SF as the type
2415 (define_insn "vsx_xscvspdp_scalar2"
2416 [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
2417 (unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2418 UNSPEC_VSX_CVSPDP))]
2419 "VECTOR_UNIT_VSX_P (V4SFmode)"
2421 [(set_attr "type" "fp")])
2423 ;; Generate xvcvhpsp instruction
2424 (define_insn "vsx_xvcvhpsp"
2425 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2426 (unspec:V4SF [(match_operand: V16QI 1 "vsx_register_operand" "wa")]
2427 UNSPEC_VSX_CVHPSP))]
2430 [(set_attr "type" "vecfloat")])
2432 ;; Generate xvcvsphp
2433 (define_insn "vsx_xvcvsphp"
2434 [(set (match_operand:V4SI 0 "register_operand" "=wa")
2435 (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2436 UNSPEC_VSX_XVCVSPHP))]
2439 [(set_attr "type" "vecfloat")])
2441 ;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF
2442 ;; format of scalars is actually DF.
2443 (define_insn "vsx_xscvdpsp_scalar"
2444 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2445 (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2446 UNSPEC_VSX_CVSPDP))]
2447 "VECTOR_UNIT_VSX_P (V4SFmode)"
2449 [(set_attr "type" "fp")])
2451 ;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs
2452 (define_insn "vsx_xscvdpspn"
2453 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2454 (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "wa")]
2455 UNSPEC_VSX_CVDPSPN))]
2458 [(set_attr "type" "fp")])
2460 (define_insn "vsx_xscvspdpn"
2461 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2462 (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2463 UNSPEC_VSX_CVSPDPN))]
2466 [(set_attr "type" "fp")])
2468 (define_insn "vsx_xscvdpspn_scalar"
2469 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2470 (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2471 UNSPEC_VSX_CVDPSPN))]
2474 [(set_attr "type" "fp")])
2476 ;; Used by direct move to move a SFmode value from GPR to VSX register
2477 (define_insn "vsx_xscvspdpn_directmove"
2478 [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
2479 (unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2480 UNSPEC_VSX_CVSPDPN))]
2483 [(set_attr "type" "fp")])
2485 ;; Convert and scale (used by vec_ctf, vec_cts, vec_ctu for double/long long)
2487 (define_insn "vsx_xvcv<su>xwsp"
2488 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2489 (any_float:V4SF (match_operand:V4SI 1 "vsx_register_operand" "wa")))]
2490 "VECTOR_UNIT_VSX_P (V4SFmode)"
2491 "xvcv<su>xwsp %x0,%x1"
2492 [(set_attr "type" "vecfloat")])
2494 (define_insn "vsx_xvcv<su>xddp"
2495 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2496 (any_float:V2DF (match_operand:V2DI 1 "vsx_register_operand" "wa")))]
2497 "VECTOR_UNIT_VSX_P (V2DFmode)"
2498 "xvcv<su>xddp %x0,%x1"
2499 [(set_attr "type" "vecdouble")])
2501 (define_insn "vsx_xvcvsp<su>xws"
2502 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
2503 (any_fix:V4SI (match_operand:V4SF 1 "vsx_register_operand" "wa")))]
2504 "VECTOR_UNIT_VSX_P (V4SFmode)"
2505 "xvcvsp<su>xws %x0,%x1"
2506 [(set_attr "type" "vecfloat")])
2508 (define_insn "vsx_xvcvdp<su>xds"
2509 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
2510 (any_fix:V2DI (match_operand:V2DF 1 "vsx_register_operand" "wa")))]
2511 "VECTOR_UNIT_VSX_P (V2DFmode)"
2512 "xvcvdp<su>xds %x0,%x1"
2513 [(set_attr "type" "vecdouble")])
2515 (define_expand "vsx_xvcvsxddp_scale"
2516 [(match_operand:V2DF 0 "vsx_register_operand")
2517 (match_operand:V2DI 1 "vsx_register_operand")
2518 (match_operand:QI 2 "immediate_operand")]
2519 "VECTOR_UNIT_VSX_P (V2DFmode)"
2521 rtx op0 = operands[0];
2522 rtx op1 = operands[1];
2523 int scale = INTVAL(operands[2]);
2524 emit_insn (gen_vsx_xvcvsxddp (op0, op1));
2526 rs6000_scale_v2df (op0, op0, -scale);
2530 (define_expand "vsx_xvcvuxddp_scale"
2531 [(match_operand:V2DF 0 "vsx_register_operand")
2532 (match_operand:V2DI 1 "vsx_register_operand")
2533 (match_operand:QI 2 "immediate_operand")]
2534 "VECTOR_UNIT_VSX_P (V2DFmode)"
2536 rtx op0 = operands[0];
2537 rtx op1 = operands[1];
2538 int scale = INTVAL(operands[2]);
2539 emit_insn (gen_vsx_xvcvuxddp (op0, op1));
2541 rs6000_scale_v2df (op0, op0, -scale);
2545 (define_expand "vsx_xvcvdpsxds_scale"
2546 [(match_operand:V2DI 0 "vsx_register_operand")
2547 (match_operand:V2DF 1 "vsx_register_operand")
2548 (match_operand:QI 2 "immediate_operand")]
2549 "VECTOR_UNIT_VSX_P (V2DFmode)"
2551 rtx op0 = operands[0];
2552 rtx op1 = operands[1];
2554 int scale = INTVAL (operands[2]);
2559 tmp = gen_reg_rtx (V2DFmode);
2560 rs6000_scale_v2df (tmp, op1, scale);
2562 emit_insn (gen_vsx_xvcvdpsxds (op0, tmp));
2566 ;; convert vector of 64-bit floating point numbers to vector of
2567 ;; 64-bit unsigned integer
2568 (define_expand "vsx_xvcvdpuxds_scale"
2569 [(match_operand:V2DI 0 "vsx_register_operand")
2570 (match_operand:V2DF 1 "vsx_register_operand")
2571 (match_operand:QI 2 "immediate_operand")]
2572 "VECTOR_UNIT_VSX_P (V2DFmode)"
2574 rtx op0 = operands[0];
2575 rtx op1 = operands[1];
2577 int scale = INTVAL (operands[2]);
2582 tmp = gen_reg_rtx (V2DFmode);
2583 rs6000_scale_v2df (tmp, op1, scale);
2585 emit_insn (gen_vsx_xvcvdpuxds (op0, tmp));
2589 ;; Convert from 64-bit to 32-bit types
2590 ;; Note, favor the Altivec registers since the usual use of these instructions
2591 ;; is in vector converts and we need to use the Altivec vperm instruction.
2593 (define_insn "vsx_xvcvdpsxws"
2594 [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
2595 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wa,wa")]
2596 UNSPEC_VSX_CVDPSXWS))]
2597 "VECTOR_UNIT_VSX_P (V2DFmode)"
2598 "xvcvdpsxws %x0,%x1"
2599 [(set_attr "type" "vecdouble")])
2601 (define_insn "vsx_xvcvdpuxws"
2602 [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
2603 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wa,wa")]
2604 UNSPEC_VSX_CVDPUXWS))]
2605 "VECTOR_UNIT_VSX_P (V2DFmode)"
2606 "xvcvdpuxws %x0,%x1"
2607 [(set_attr "type" "vecdouble")])
2609 (define_insn "vsx_xvcvsxdsp"
2610 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2611 (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2612 UNSPEC_VSX_CVSXDSP))]
2613 "VECTOR_UNIT_VSX_P (V2DFmode)"
2615 [(set_attr "type" "vecfloat")])
2617 (define_insn "vsx_xvcvuxdsp"
2618 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2619 (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2620 UNSPEC_VSX_CVUXDSP))]
2621 "VECTOR_UNIT_VSX_P (V2DFmode)"
2623 [(set_attr "type" "vecdouble")])
2625 ;; Convert vector of 32-bit signed/unsigned integers to vector of
2626 ;; 64-bit floating point numbers.
2627 (define_insn "vsx_xvcv<su>xwdp_be"
2628 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2630 (vec_select:V2SI (match_operand:V4SI 1 "vsx_register_operand" "wa")
2631 (parallel [(const_int 0) (const_int 2)]))))]
2632 "VECTOR_UNIT_VSX_P (V2DFmode) && BYTES_BIG_ENDIAN"
2633 "xvcv<su>xwdp %x0,%x1"
2634 [(set_attr "type" "vecdouble")])
2636 (define_insn "vsx_xvcv<su>xwdp_le"
2637 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2639 (vec_select:V2SI (match_operand:V4SI 1 "vsx_register_operand" "wa")
2640 (parallel [(const_int 1) (const_int 3)]))))]
2641 "VECTOR_UNIT_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
2642 "xvcv<su>xwdp %x0,%x1"
2643 [(set_attr "type" "vecdouble")])
2645 (define_expand "vsx_xvcv<su>xwdp"
2646 [(match_operand:V2DF 0 "vsx_register_operand")
2647 (match_operand:V4SI 1 "vsx_register_operand")
2649 "VECTOR_UNIT_VSX_P (V2DFmode)"
2651 if (BYTES_BIG_ENDIAN)
2652 emit_insn (gen_vsx_xvcv<su>xwdp_be (operands[0], operands[1]));
2654 emit_insn (gen_vsx_xvcv<su>xwdp_le (operands[0], operands[1]));
2658 (define_insn "vsx_xvcvsxwdp_df"
2659 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2660 (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2661 UNSPEC_VSX_CVSXWDP))]
2664 [(set_attr "type" "vecdouble")])
2666 (define_insn "vsx_xvcvuxwdp_df"
2667 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2668 (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2669 UNSPEC_VSX_CVUXWDP))]
2672 [(set_attr "type" "vecdouble")])
2674 ;; Convert vector of 32-bit floating point numbers to vector of
2675 ;; 64-bit signed/unsigned integers.
2676 (define_insn "vsx_xvcvsp<su>xds_be"
2677 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2679 (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2680 (parallel [(const_int 0) (const_int 2)]))))]
2681 "VECTOR_UNIT_VSX_P (V2DFmode) && BYTES_BIG_ENDIAN"
2682 "xvcvsp<su>xds %x0,%x1"
2683 [(set_attr "type" "vecdouble")])
2685 (define_insn "vsx_xvcvsp<su>xds_le"
2686 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2688 (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2689 (parallel [(const_int 1) (const_int 3)]))))]
2690 "VECTOR_UNIT_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
2691 "xvcvsp<su>xds %x0,%x1"
2692 [(set_attr "type" "vecdouble")])
2694 (define_expand "vsx_xvcvsp<su>xds"
2695 [(match_operand:V2DI 0 "vsx_register_operand")
2696 (match_operand:V4SF 1 "vsx_register_operand")
2698 "VECTOR_UNIT_VSX_P (V2DFmode)"
2700 if (BYTES_BIG_ENDIAN)
2701 emit_insn (gen_vsx_xvcvsp<su>xds_be (operands[0], operands[1]));
2703 emit_insn (gen_vsx_xvcvsp<su>xds_le (operands[0], operands[1]));
2707 ;; Convert float vector even elements to signed long long vector
2708 (define_expand "vsignede_v4sf"
2709 [(match_operand:V2DI 0 "vsx_register_operand")
2710 (match_operand:V4SF 1 "vsx_register_operand")]
2711 "VECTOR_UNIT_VSX_P (V2DFmode)"
2713 if (BYTES_BIG_ENDIAN)
2714 emit_insn (gen_vsx_xvcvspsxds_be (operands[0], operands[1]));
2717 /* Shift left one word to put even word in correct location. */
2718 rtx rtx_tmp = gen_reg_rtx (V4SFmode);
2719 rtx rtx_val = GEN_INT (4);
2720 emit_insn (gen_altivec_vsldoi_v4sf (rtx_tmp, operands[1], operands[1],
2722 emit_insn (gen_vsx_xvcvspsxds_le (operands[0], rtx_tmp));
2728 ;; Convert float vector odd elements to signed long long vector
2729 (define_expand "vsignedo_v4sf"
2730 [(match_operand:V2DI 0 "vsx_register_operand")
2731 (match_operand:V4SF 1 "vsx_register_operand")]
2732 "VECTOR_UNIT_VSX_P (V2DFmode)"
2734 if (BYTES_BIG_ENDIAN)
2736 /* Shift left one word to put even word in correct location. */
2737 rtx rtx_tmp = gen_reg_rtx (V4SFmode);
2738 rtx rtx_val = GEN_INT (4);
2739 emit_insn (gen_altivec_vsldoi_v4sf (rtx_tmp, operands[1], operands[1],
2741 emit_insn (gen_vsx_xvcvspsxds_be (operands[0], rtx_tmp));
2744 emit_insn (gen_vsx_xvcvspsxds_le (operands[0], operands[1]));
2749 ;; Convert float vector of even vector elements to unsigned long long vector
2750 (define_expand "vunsignede_v4sf"
2751 [(match_operand:V2DI 0 "vsx_register_operand")
2752 (match_operand:V4SF 1 "vsx_register_operand")]
2753 "VECTOR_UNIT_VSX_P (V2DFmode)"
2755 if (BYTES_BIG_ENDIAN)
2756 emit_insn (gen_vsx_xvcvspuxds_be (operands[0], operands[1]));
2759 /* Shift left one word to put even word in correct location. */
2760 rtx rtx_tmp = gen_reg_rtx (V4SFmode);
2761 rtx rtx_val = GEN_INT (4);
2762 emit_insn (gen_altivec_vsldoi_v4sf (rtx_tmp, operands[1], operands[1],
2764 emit_insn (gen_vsx_xvcvspuxds_le (operands[0], rtx_tmp));
2770 ;; Convert float vector of odd elements to unsigned long long vector
2771 (define_expand "vunsignedo_v4sf"
2772 [(match_operand:V2DI 0 "vsx_register_operand")
2773 (match_operand:V4SF 1 "vsx_register_operand")]
2774 "VECTOR_UNIT_VSX_P (V2DFmode)"
2776 if (BYTES_BIG_ENDIAN)
2778 /* Shift left one word to put even word in correct location. */
2779 rtx rtx_tmp = gen_reg_rtx (V4SFmode);
2780 rtx rtx_val = GEN_INT (4);
2781 emit_insn (gen_altivec_vsldoi_v4sf (rtx_tmp, operands[1], operands[1],
2783 emit_insn (gen_vsx_xvcvspuxds_be (operands[0], rtx_tmp));
2786 emit_insn (gen_vsx_xvcvspuxds_le (operands[0], operands[1]));
2791 ;; Generate float2 double
2792 ;; convert two double to float
2793 (define_expand "float2_v2df"
2794 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2795 (use (match_operand:V2DF 1 "register_operand" "wa"))
2796 (use (match_operand:V2DF 2 "register_operand" "wa"))]
2797 "VECTOR_UNIT_VSX_P (V4SFmode)"
2799 rtx rtx_src1, rtx_src2, rtx_dst;
2801 rtx_dst = operands[0];
2802 rtx_src1 = operands[1];
2803 rtx_src2 = operands[2];
2805 rs6000_generate_float2_double_code (rtx_dst, rtx_src1, rtx_src2);
2810 ;; convert two long long signed ints to float
2811 (define_expand "float2_v2di"
2812 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2813 (use (match_operand:V2DI 1 "register_operand" "wa"))
2814 (use (match_operand:V2DI 2 "register_operand" "wa"))]
2815 "VECTOR_UNIT_VSX_P (V4SFmode)"
2817 rtx rtx_src1, rtx_src2, rtx_dst;
2819 rtx_dst = operands[0];
2820 rtx_src1 = operands[1];
2821 rtx_src2 = operands[2];
2823 rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2827 ;; Generate uns_float2
2828 ;; convert two long long unsigned ints to float
2829 (define_expand "uns_float2_v2di"
2830 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2831 (use (match_operand:V2DI 1 "register_operand" "wa"))
2832 (use (match_operand:V2DI 2 "register_operand" "wa"))]
2833 "VECTOR_UNIT_VSX_P (V4SFmode)"
2835 rtx rtx_src1, rtx_src2, rtx_dst;
2837 rtx_dst = operands[0];
2838 rtx_src1 = operands[1];
2839 rtx_src2 = operands[2];
2841 rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2846 ;; convert double or long long signed to float
2847 ;; (Only even words are valid, BE numbering)
2848 (define_expand "floate<mode>"
2849 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2850 (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2851 "VECTOR_UNIT_VSX_P (V4SFmode)"
2853 if (BYTES_BIG_ENDIAN)
2855 /* Shift left one word to put even word correct location */
2857 rtx rtx_val = GEN_INT (4);
2859 rtx_tmp = gen_reg_rtx (V4SFmode);
2860 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2861 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2862 rtx_tmp, rtx_tmp, rtx_val));
2865 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2870 ;; Generate uns_floate
2871 ;; convert long long unsigned to float
2872 ;; (Only even words are valid, BE numbering)
2873 (define_expand "unsfloatev2di"
2874 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2875 (use (match_operand:V2DI 1 "register_operand" "wa"))]
2876 "VECTOR_UNIT_VSX_P (V4SFmode)"
2878 if (BYTES_BIG_ENDIAN)
2880 /* Shift left one word to put even word correct location */
2882 rtx rtx_val = GEN_INT (4);
2884 rtx_tmp = gen_reg_rtx (V4SFmode);
2885 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2886 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2887 rtx_tmp, rtx_tmp, rtx_val));
2890 emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2896 ;; convert double or long long signed to float
2897 ;; Only odd words are valid, BE numbering)
2898 (define_expand "floato<mode>"
2899 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2900 (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2901 "VECTOR_UNIT_VSX_P (V4SFmode)"
2903 if (BYTES_BIG_ENDIAN)
2904 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2907 /* Shift left one word to put odd word correct location */
2909 rtx rtx_val = GEN_INT (4);
2911 rtx_tmp = gen_reg_rtx (V4SFmode);
2912 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2913 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2914 rtx_tmp, rtx_tmp, rtx_val));
2919 ;; Generate uns_floato
2920 ;; convert long long unsigned to float
2921 ;; (Only odd words are valid, BE numbering)
2922 (define_expand "unsfloatov2di"
2923 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2924 (use (match_operand:V2DI 1 "register_operand" "wa"))]
2925 "VECTOR_UNIT_VSX_P (V4SFmode)"
2927 if (BYTES_BIG_ENDIAN)
2928 emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2931 /* Shift left one word to put odd word correct location */
2933 rtx rtx_val = GEN_INT (4);
2935 rtx_tmp = gen_reg_rtx (V4SFmode);
2936 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2937 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2938 rtx_tmp, rtx_tmp, rtx_val));
2943 ;; Generate vsigned2
2944 ;; convert two double float vectors to a vector of single precision ints
2945 (define_expand "vsigned2_v2df"
2946 [(match_operand:V4SI 0 "register_operand" "=wa")
2947 (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "wa")
2948 (match_operand:V2DF 2 "register_operand" "wa")]
2949 UNSPEC_VSX_VSIGNED2)]
2952 rtx rtx_src1, rtx_src2, rtx_dst;
2953 bool signed_convert=true;
2955 rtx_dst = operands[0];
2956 rtx_src1 = operands[1];
2957 rtx_src2 = operands[2];
2959 rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2963 ;; Generate vsignedo_v2df
2964 ;; signed double float to int convert odd word
2965 (define_expand "vsignedo_v2df"
2966 [(set (match_operand:V4SI 0 "register_operand" "=wa")
2967 (match_operand:V2DF 1 "register_operand" "wa"))]
2970 if (BYTES_BIG_ENDIAN)
2973 rtx rtx_val = GEN_INT (12);
2974 rtx_tmp = gen_reg_rtx (V4SImode);
2976 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2978 /* Big endian word numbering for words in operand is 0 1 2 3.
2979 take (operand[1] operand[1]) and shift left one word
2980 0 1 2 3 0 1 2 3 => 1 2 3 0
2981 Words 1 and 3 are now are now where they need to be for result. */
2983 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2987 /* Little endian word numbering for operand is 3 2 1 0.
2988 Result words 3 and 1 are where they need to be. */
2989 emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2993 [(set_attr "type" "veccomplex")])
2995 ;; Generate vsignede_v2df
2996 ;; signed double float to int even word
2997 (define_expand "vsignede_v2df"
2998 [(set (match_operand:V4SI 0 "register_operand" "=v")
2999 (match_operand:V2DF 1 "register_operand" "v"))]
3002 if (BYTES_BIG_ENDIAN)
3003 /* Big endian word numbering for words in operand is 0 1
3004 Result words 0 is where they need to be. */
3005 emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
3010 rtx rtx_val = GEN_INT (12);
3011 rtx_tmp = gen_reg_rtx (V4SImode);
3013 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
3015 /* Little endian word numbering for operand is 3 2 1 0.
3016 take (operand[1] operand[1]) and shift left three words
3017 0 1 2 3 0 1 2 3 => 3 0 1 2
3018 Words 0 and 2 are now where they need to be for the result. */
3019 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
3024 [(set_attr "type" "veccomplex")])
3026 ;; Generate unsigned2
3027 ;; convert two double float vectors to a vector of single precision
3029 (define_expand "vunsigned2_v2df"
3030 [(match_operand:V4SI 0 "register_operand" "=v")
3031 (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "v")
3032 (match_operand:V2DF 2 "register_operand" "v")]
3033 UNSPEC_VSX_VSIGNED2)]
3036 rtx rtx_src1, rtx_src2, rtx_dst;
3037 bool signed_convert=false;
3039 rtx_dst = operands[0];
3040 rtx_src1 = operands[1];
3041 rtx_src2 = operands[2];
3043 rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
3047 ;; Generate vunsignedo_v2df
3048 ;; unsigned double float to int convert odd word
3049 (define_expand "vunsignedo_v2df"
3050 [(set (match_operand:V4SI 0 "register_operand" "=v")
3051 (match_operand:V2DF 1 "register_operand" "v"))]
3054 if (BYTES_BIG_ENDIAN)
3057 rtx rtx_val = GEN_INT (12);
3058 rtx_tmp = gen_reg_rtx (V4SImode);
3060 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
3062 /* Big endian word numbering for words in operand is 0 1 2 3.
3063 take (operand[1] operand[1]) and shift left one word
3064 0 1 2 3 0 1 2 3 => 1 2 3 0
3065 Words 1 and 3 are now are now where they need to be for result. */
3067 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
3071 /* Little endian word numbering for operand is 3 2 1 0.
3072 Result words 3 and 1 are where they need to be. */
3073 emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
3077 [(set_attr "type" "veccomplex")])
3079 ;; Generate vunsignede_v2df
3080 ;; unsigned double float to int even word
3081 (define_expand "vunsignede_v2df"
3082 [(set (match_operand:V4SI 0 "register_operand" "=v")
3083 (match_operand:V2DF 1 "register_operand" "v"))]
3086 if (BYTES_BIG_ENDIAN)
3087 /* Big endian word numbering for words in operand is 0 1
3088 Result words 0 is where they need to be. */
3089 emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
3094 rtx rtx_val = GEN_INT (12);
3095 rtx_tmp = gen_reg_rtx (V4SImode);
3097 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
3099 /* Little endian word numbering for operand is 3 2 1 0.
3100 take (operand[1] operand[1]) and shift left three words
3101 0 1 2 3 0 1 2 3 => 3 0 1 2
3102 Words 0 and 2 are now where they need to be for the result. */
3103 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
3108 [(set_attr "type" "veccomplex")])
3110 ;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since
3111 ;; since the xvrdpiz instruction does not truncate the value if the floating
3112 ;; point value is < LONG_MIN or > LONG_MAX.
3113 (define_insn "*vsx_float_fix_v2df2"
3114 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,?wa")
3117 (match_operand:V2DF 1 "vsx_register_operand" "wa,?wa"))))]
3119 && VECTOR_UNIT_VSX_P (V2DFmode) && flag_unsafe_math_optimizations
3120 && !flag_trapping_math && TARGET_FRIZ"
3122 [(set_attr "type" "vecdouble")])
3125 ;; Permute operations
3127 ;; Build a V2DF/V2DI vector from two scalars
3128 (define_insn "vsx_concat_<mode>"
3129 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we")
3131 (match_operand:<VEC_base> 1 "gpc_reg_operand" "wa,b")
3132 (match_operand:<VEC_base> 2 "gpc_reg_operand" "wa,b")))]
3133 "VECTOR_MEM_VSX_P (<MODE>mode)"
3135 if (which_alternative == 0)
3136 return (BYTES_BIG_ENDIAN
3137 ? "xxpermdi %x0,%x1,%x2,0"
3138 : "xxpermdi %x0,%x2,%x1,0");
3140 else if (which_alternative == 1)
3141 return (BYTES_BIG_ENDIAN
3142 ? "mtvsrdd %x0,%1,%2"
3143 : "mtvsrdd %x0,%2,%1");
3148 [(set_attr "type" "vecperm,vecmove")])
3150 ;; Combiner patterns to allow creating XXPERMDI's to access either double
3151 ;; word element in a vector register.
3152 (define_insn "*vsx_concat_<mode>_1"
3153 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3155 (vec_select:<VEC_base>
3156 (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
3157 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
3158 (match_operand:<VEC_base> 3 "gpc_reg_operand" "wa")))]
3159 "VECTOR_MEM_VSX_P (<MODE>mode)"
3161 HOST_WIDE_INT dword = INTVAL (operands[2]);
3162 if (BYTES_BIG_ENDIAN)
3164 operands[4] = GEN_INT (2*dword);
3165 return "xxpermdi %x0,%x1,%x3,%4";
3169 operands[4] = GEN_INT (!dword);
3170 return "xxpermdi %x0,%x3,%x1,%4";
3173 [(set_attr "type" "vecperm")])
3175 (define_insn "*vsx_concat_<mode>_2"
3176 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3178 (match_operand:<VEC_base> 1 "gpc_reg_operand" "wa")
3179 (vec_select:<VEC_base>
3180 (match_operand:VSX_D 2 "gpc_reg_operand" "wa")
3181 (parallel [(match_operand:QI 3 "const_0_to_1_operand" "n")]))))]
3182 "VECTOR_MEM_VSX_P (<MODE>mode)"
3184 HOST_WIDE_INT dword = INTVAL (operands[3]);
3185 if (BYTES_BIG_ENDIAN)
3187 operands[4] = GEN_INT (dword);
3188 return "xxpermdi %x0,%x1,%x2,%4";
3192 operands[4] = GEN_INT (2 * !dword);
3193 return "xxpermdi %x0,%x2,%x1,%4";
3196 [(set_attr "type" "vecperm")])
3198 (define_insn "*vsx_concat_<mode>_3"
3199 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3201 (vec_select:<VEC_base>
3202 (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
3203 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
3204 (vec_select:<VEC_base>
3205 (match_operand:VSX_D 3 "gpc_reg_operand" "wa")
3206 (parallel [(match_operand:QI 4 "const_0_to_1_operand" "n")]))))]
3207 "VECTOR_MEM_VSX_P (<MODE>mode)"
3209 HOST_WIDE_INT dword1 = INTVAL (operands[2]);
3210 HOST_WIDE_INT dword2 = INTVAL (operands[4]);
3211 if (BYTES_BIG_ENDIAN)
3213 operands[5] = GEN_INT ((2 * dword1) + dword2);
3214 return "xxpermdi %x0,%x1,%x3,%5";
3218 operands[5] = GEN_INT ((2 * !dword2) + !dword1);
3219 return "xxpermdi %x0,%x3,%x1,%5";
3222 [(set_attr "type" "vecperm")])
3224 ;; Special purpose concat using xxpermdi to glue two single precision values
3225 ;; together, relying on the fact that internally scalar floats are represented
3226 ;; as doubles. This is used to initialize a V4SF vector with 4 floats
3227 (define_insn "vsx_concat_v2sf"
3228 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
3230 [(match_operand:SF 1 "vsx_register_operand" "wa")
3231 (match_operand:SF 2 "vsx_register_operand" "wa")]
3232 UNSPEC_VSX_CONCAT))]
3233 "VECTOR_MEM_VSX_P (V2DFmode)"
3235 if (BYTES_BIG_ENDIAN)
3236 return "xxpermdi %x0,%x1,%x2,0";
3238 return "xxpermdi %x0,%x2,%x1,0";
3240 [(set_attr "type" "vecperm")])
3242 ;; Concatenate 4 SImode elements into a V4SImode reg.
3243 (define_expand "vsx_init_v4si"
3244 [(use (match_operand:V4SI 0 "gpc_reg_operand"))
3245 (use (match_operand:SI 1 "gpc_reg_operand"))
3246 (use (match_operand:SI 2 "gpc_reg_operand"))
3247 (use (match_operand:SI 3 "gpc_reg_operand"))
3248 (use (match_operand:SI 4 "gpc_reg_operand"))]
3249 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3251 rtx a = gen_lowpart_SUBREG (DImode, operands[1]);
3252 rtx b = gen_lowpart_SUBREG (DImode, operands[2]);
3253 rtx c = gen_lowpart_SUBREG (DImode, operands[3]);
3254 rtx d = gen_lowpart_SUBREG (DImode, operands[4]);
3255 if (!BYTES_BIG_ENDIAN)
3261 rtx ab = gen_reg_rtx (DImode);
3262 rtx cd = gen_reg_rtx (DImode);
3263 emit_insn (gen_rotldi3_insert_3 (ab, a, GEN_INT (32), b,
3264 GEN_INT (0xffffffff)));
3265 emit_insn (gen_rotldi3_insert_3 (cd, c, GEN_INT (32), d,
3266 GEN_INT (0xffffffff)));
3268 rtx abcd = gen_reg_rtx (V2DImode);
3269 emit_insn (gen_vsx_concat_v2di (abcd, ab, cd));
3270 emit_move_insn (operands[0], gen_lowpart (V4SImode, abcd));
3274 ;; xxpermdi for little endian loads and stores. We need several of
3275 ;; these since the form of the PARALLEL differs by mode.
3276 (define_insn "*vsx_xxpermdi2_le_<mode>"
3277 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3279 (match_operand:VSX_D 1 "vsx_register_operand" "wa")
3280 (parallel [(const_int 1) (const_int 0)])))]
3281 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
3282 "xxpermdi %x0,%x1,%x1,2"
3283 [(set_attr "type" "vecperm")])
3285 (define_insn "xxswapd_v16qi"
3286 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
3288 (match_operand:V16QI 1 "vsx_register_operand" "wa")
3289 (parallel [(const_int 8) (const_int 9)
3290 (const_int 10) (const_int 11)
3291 (const_int 12) (const_int 13)
3292 (const_int 14) (const_int 15)
3293 (const_int 0) (const_int 1)
3294 (const_int 2) (const_int 3)
3295 (const_int 4) (const_int 5)
3296 (const_int 6) (const_int 7)])))]
3298 ;; AIX does not support the extended mnemonic xxswapd. Use the basic
3299 ;; mnemonic xxpermdi instead.
3300 "xxpermdi %x0,%x1,%x1,2"
3301 [(set_attr "type" "vecperm")])
3303 (define_insn "xxswapd_v8hi"
3304 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
3306 (match_operand:V8HI 1 "vsx_register_operand" "wa")
3307 (parallel [(const_int 4) (const_int 5)
3308 (const_int 6) (const_int 7)
3309 (const_int 0) (const_int 1)
3310 (const_int 2) (const_int 3)])))]
3312 ;; AIX does not support the extended mnemonic xxswapd. Use the basic
3313 ;; mnemonic xxpermdi instead.
3314 "xxpermdi %x0,%x1,%x1,2"
3315 [(set_attr "type" "vecperm")])
3317 (define_insn "xxswapd_<mode>"
3318 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
3320 (match_operand:VSX_W 1 "vsx_register_operand" "wa")
3321 (parallel [(const_int 2) (const_int 3)
3322 (const_int 0) (const_int 1)])))]
3324 ;; AIX does not support extended mnemonic xxswapd. Use the basic
3325 ;; mnemonic xxpermdi instead.
3326 "xxpermdi %x0,%x1,%x1,2"
3327 [(set_attr "type" "vecperm")])
3329 (define_insn "xxswapd_<mode>"
3330 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3332 (match_operand:VSX_D 1 "vsx_register_operand" "wa")
3333 (parallel [(const_int 1) (const_int 0)])))]
3335 ;; AIX does not support extended mnemonic xxswapd. Use the basic
3336 ;; mnemonic xxpermdi instead.
3337 "xxpermdi %x0,%x1,%x1,2"
3338 [(set_attr "type" "vecperm")])
3340 ;; Swap upper/lower 64-bit values in a 128-bit vector
3341 (define_insn "xxswapd_v1ti"
3342 [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
3346 (match_operand:V1TI 1 "vsx_register_operand" "v") 0 )
3347 (parallel [(const_int 1)(const_int 0)]))
3350 ;; AIX does not support extended mnemonic xxswapd. Use the basic
3351 ;; mnemonic xxpermdi instead.
3352 "xxpermdi %x0,%x1,%x1,2"
3353 [(set_attr "type" "vecperm")])
3355 (define_insn "xxgenpcvm_<mode>_internal"
3356 [(set (match_operand:VSX_EXTRACT_I4 0 "altivec_register_operand" "=wa")
3357 (unspec:VSX_EXTRACT_I4
3358 [(match_operand:VSX_EXTRACT_I4 1 "altivec_register_operand" "v")
3359 (match_operand:QI 2 "const_0_to_3_operand" "n")]
3362 "xxgenpcv<wd>m %x0,%1,%2"
3363 [(set_attr "type" "vecsimple")])
3365 (define_expand "xxgenpcvm_<mode>"
3366 [(use (match_operand:VSX_EXTRACT_I4 0 "register_operand"))
3367 (use (match_operand:VSX_EXTRACT_I4 1 "register_operand"))
3368 (use (match_operand:QI 2 "immediate_operand"))]
3371 if (!BYTES_BIG_ENDIAN)
3373 /* gen_xxgenpcvm assumes Big Endian order. If LE,
3374 change swap upper and lower double words. */
3375 rtx tmp = gen_reg_rtx (<MODE>mode);
3377 emit_insn (gen_xxswapd_<mode> (tmp, operands[1]));
3380 emit_insn (gen_xxgenpcvm_<mode>_internal (operands[0], operands[1],
3385 ;; lxvd2x for little endian loads. We need several of
3386 ;; these since the form of the PARALLEL differs by mode.
3387 (define_insn "*vsx_lxvd2x2_le_<mode>"
3388 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3390 (match_operand:VSX_D 1 "memory_operand" "Z")
3391 (parallel [(const_int 1) (const_int 0)])))]
3392 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3394 [(set_attr "type" "vecload")])
3396 (define_insn "*vsx_lxvd2x4_le_<mode>"
3397 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
3399 (match_operand:VSX_W 1 "memory_operand" "Z")
3400 (parallel [(const_int 2) (const_int 3)
3401 (const_int 0) (const_int 1)])))]
3402 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3404 [(set_attr "type" "vecload")])
3406 (define_insn "*vsx_lxvd2x8_le_V8HI"
3407 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
3409 (match_operand:V8HI 1 "memory_operand" "Z")
3410 (parallel [(const_int 4) (const_int 5)
3411 (const_int 6) (const_int 7)
3412 (const_int 0) (const_int 1)
3413 (const_int 2) (const_int 3)])))]
3414 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
3416 [(set_attr "type" "vecload")])
3418 (define_insn "*vsx_lxvd2x16_le_V16QI"
3419 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
3421 (match_operand:V16QI 1 "memory_operand" "Z")
3422 (parallel [(const_int 8) (const_int 9)
3423 (const_int 10) (const_int 11)
3424 (const_int 12) (const_int 13)
3425 (const_int 14) (const_int 15)
3426 (const_int 0) (const_int 1)
3427 (const_int 2) (const_int 3)
3428 (const_int 4) (const_int 5)
3429 (const_int 6) (const_int 7)])))]
3430 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
3432 [(set_attr "type" "vecload")])
3434 ;; stxvd2x for little endian stores. We need several of
3435 ;; these since the form of the PARALLEL differs by mode.
3436 (define_insn "*vsx_stxvd2x2_le_<mode>"
3437 [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
3439 (match_operand:VSX_D 1 "vsx_register_operand" "wa")
3440 (parallel [(const_int 1) (const_int 0)])))]
3441 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3443 [(set_attr "type" "vecstore")])
3445 (define_insn "*vsx_stxvd2x4_le_<mode>"
3446 [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
3448 (match_operand:VSX_W 1 "vsx_register_operand" "wa")
3449 (parallel [(const_int 2) (const_int 3)
3450 (const_int 0) (const_int 1)])))]
3451 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3453 [(set_attr "type" "vecstore")])
3455 (define_insn_and_split "vsx_stxvd2x4_le_const_<mode>"
3456 [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
3457 (match_operand:VSX_W 1 "immediate_operand" "W"))]
3459 && VECTOR_MEM_VSX_P (<MODE>mode)
3460 && !TARGET_P9_VECTOR
3461 && const_vec_duplicate_p (operands[1])
3462 && can_create_pseudo_p ()"
3470 (parallel [(const_int 2) (const_int 3)
3471 (const_int 0) (const_int 1)])))]
3473 /* Here all the constants must be loaded without memory. */
3474 gcc_assert (easy_altivec_constant (operands[1], <MODE>mode));
3475 operands[2] = gen_reg_rtx (<MODE>mode);
3477 [(set_attr "type" "vecstore")
3478 (set_attr "length" "8")])
3480 (define_insn "*vsx_stxvd2x8_le_V8HI"
3481 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
3483 (match_operand:V8HI 1 "vsx_register_operand" "wa")
3484 (parallel [(const_int 4) (const_int 5)
3485 (const_int 6) (const_int 7)
3486 (const_int 0) (const_int 1)
3487 (const_int 2) (const_int 3)])))]
3488 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
3490 [(set_attr "type" "vecstore")])
3492 (define_insn "*vsx_stxvd2x16_le_V16QI"
3493 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
3495 (match_operand:V16QI 1 "vsx_register_operand" "wa")
3496 (parallel [(const_int 8) (const_int 9)
3497 (const_int 10) (const_int 11)
3498 (const_int 12) (const_int 13)
3499 (const_int 14) (const_int 15)
3500 (const_int 0) (const_int 1)
3501 (const_int 2) (const_int 3)
3502 (const_int 4) (const_int 5)
3503 (const_int 6) (const_int 7)])))]
3504 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
3506 [(set_attr "type" "vecstore")])
3508 ;; Convert a TImode value into V1TImode
3509 (define_expand "vsx_set_v1ti"
3510 [(match_operand:V1TI 0 "nonimmediate_operand")
3511 (match_operand:V1TI 1 "nonimmediate_operand")
3512 (match_operand:TI 2 "input_operand")
3513 (match_operand:QI 3 "u5bit_cint_operand")]
3514 "VECTOR_MEM_VSX_P (V1TImode)"
3516 if (operands[3] != const0_rtx)
3519 emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1]));
3523 ;; Rewrite V2DF/V2DI set in terms of VEC_CONCAT
3524 (define_expand "vsx_set_<mode>"
3525 [(use (match_operand:VSX_D 0 "vsx_register_operand"))
3526 (use (match_operand:VSX_D 1 "vsx_register_operand"))
3527 (use (match_operand:<VEC_base> 2 "gpc_reg_operand"))
3528 (use (match_operand:QI 3 "const_0_to_1_operand"))]
3529 "VECTOR_MEM_VSX_P (<MODE>mode)"
3531 rtx dest = operands[0];
3532 rtx vec_reg = operands[1];
3533 rtx value = operands[2];
3534 rtx ele = operands[3];
3535 rtx tmp = gen_reg_rtx (<VEC_base>mode);
3537 if (ele == const0_rtx)
3539 emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const1_rtx));
3540 emit_insn (gen_vsx_concat_<mode> (dest, value, tmp));
3543 else if (ele == const1_rtx)
3545 emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const0_rtx));
3546 emit_insn (gen_vsx_concat_<mode> (dest, tmp, value));
3553 ;; Extract a DF/DI element from V2DF/V2DI
3554 ;; Optimize cases were we can do a simple or direct move.
3555 ;; Or see if we can avoid doing the move at all
3557 (define_expand "vsx_extract_<mode>"
3558 [(set (match_operand:<VEC_base> 0 "gpc_reg_operand")
3559 (vec_select:<VEC_base>
3560 (match_operand:VSX_D 1 "gpc_reg_operand")
3562 [(match_operand:QI 2 "const_0_to_1_operand")])))]
3563 "VECTOR_MEM_VSX_P (<MODE>mode)"
3566 (define_insn "*vsx_extract_<mode>_0"
3567 [(set (match_operand:<VEC_base> 0 "gpc_reg_operand" "=wa,wa,wr")
3568 (vec_select:<VEC_base>
3569 (match_operand:VSX_D 1 "gpc_reg_operand" "0,wa,wa")
3571 [(match_operand:QI 2 "const_0_to_1_operand" "n,n,n")])))]
3572 "VECTOR_MEM_VSX_P (<MODE>mode)
3573 && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 0 : 1)"
3575 if (which_alternative == 0)
3576 return ASM_COMMENT_START " vec_extract to same register";
3578 if (which_alternative == 2)
3579 return "mfvsrd %0,%x1";
3581 return "xxlor %x0,%x1,%x1";
3583 [(set_attr "type" "*,veclogical,mfvsr")
3584 (set_attr "isa" "*,*,p8v")
3585 (set_attr "length" "0,*,*")])
3587 (define_insn "*vsx_extract_<mode>_1"
3588 [(set (match_operand:<VEC_base> 0 "gpc_reg_operand" "=wa,wr")
3589 (vec_select:<VEC_base>
3590 (match_operand:VSX_D 1 "gpc_reg_operand" "wa,wa")
3592 [(match_operand:QI 2 "const_0_to_1_operand" "n,n")])))]
3593 "VECTOR_MEM_VSX_P (<MODE>mode)
3594 && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 1 : 0)"
3596 if (which_alternative == 1)
3597 return "mfvsrld %0,%x1";
3599 operands[3] = GEN_INT (BYTES_BIG_ENDIAN ? 2 : 3);
3600 return "xxpermdi %x0,%x1,%x1,%3";
3602 [(set_attr "type" "mfvsr,vecperm")
3603 (set_attr "isa" "*,p9v")])
3605 ;; Optimize extracting a single scalar element from memory.
3606 (define_insn_and_split "*vsx_extract_<P:mode>_<VSX_D:mode>_load"
3607 [(set (match_operand:<VEC_base> 0 "register_operand" "=wa,wr")
3608 (vec_select:<VSX_D:VEC_base>
3609 (match_operand:VSX_D 1 "memory_operand" "m,m")
3610 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n,n")])))
3611 (clobber (match_scratch:P 3 "=&b,&b"))]
3612 "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<VSX_D:MODE>mode)"
3614 "&& reload_completed"
3615 [(set (match_dup 0) (match_dup 4))]
3617 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3618 operands[3], <VSX_D:VEC_base>mode);
3620 [(set_attr "type" "fpload,load")
3621 (set_attr "length" "8")])
3623 ;; Optimize storing a single scalar element that is the right location to
3625 (define_insn "*vsx_extract_<mode>_store"
3626 [(set (match_operand:<VEC_base> 0 "memory_operand" "=m,Z,wY")
3627 (vec_select:<VEC_base>
3628 (match_operand:VSX_D 1 "register_operand" "d,v,v")
3629 (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "n,n,n")])))]
3630 "VECTOR_MEM_VSX_P (<MODE>mode)
3631 && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 0 : 1)"
3636 [(set_attr "type" "fpstore")
3637 (set_attr "isa" "*,p7v,p9v")])
3639 ;; Variable V2DI/V2DF extract shift
3640 (define_insn "vsx_vslo_<mode>"
3641 [(set (match_operand:<VEC_base> 0 "gpc_reg_operand" "=v")
3642 (unspec:<VEC_base> [(match_operand:VSX_D 1 "gpc_reg_operand" "v")
3643 (match_operand:V2DI 2 "gpc_reg_operand" "v")]
3645 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3647 [(set_attr "type" "vecperm")])
3649 ;; Variable V2DI/V2DF extract from a register
3650 (define_insn_and_split "vsx_extract_<mode>_var"
3651 [(set (match_operand:<VEC_base> 0 "gpc_reg_operand" "=v")
3652 (unspec:<VEC_base> [(match_operand:VSX_D 1 "gpc_reg_operand" "v")
3653 (match_operand:DI 2 "gpc_reg_operand" "r")]
3654 UNSPEC_VSX_EXTRACT))
3655 (clobber (match_scratch:DI 3 "=r"))
3656 (clobber (match_scratch:V2DI 4 "=&v"))]
3657 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3659 "&& reload_completed"
3662 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3663 operands[3], operands[4]);
3667 ;; Variable V2DI/V2DF extract from memory
3668 (define_insn_and_split "*vsx_extract_<mode>_var_load"
3669 [(set (match_operand:<VEC_base> 0 "gpc_reg_operand" "=wa,r")
3670 (unspec:<VEC_base> [(match_operand:VSX_D 1 "memory_operand" "Q,Q")
3671 (match_operand:DI 2 "gpc_reg_operand" "r,r")]
3672 UNSPEC_VSX_EXTRACT))
3673 (clobber (match_scratch:DI 3 "=&b,&b"))]
3674 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3676 "&& reload_completed"
3677 [(set (match_dup 0) (match_dup 4))]
3679 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3680 operands[3], <VEC_base>mode);
3682 [(set_attr "type" "fpload,load")])
3684 ;; Extract a SF element from V4SF
3685 (define_insn_and_split "vsx_extract_v4sf"
3686 [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
3688 (match_operand:V4SF 1 "vsx_register_operand" "wa")
3689 (parallel [(match_operand:QI 2 "u5bit_cint_operand" "n")])))
3690 (clobber (match_scratch:V4SF 3 "=0"))]
3691 "VECTOR_UNIT_VSX_P (V4SFmode)"
3696 rtx op0 = operands[0];
3697 rtx op1 = operands[1];
3698 rtx op2 = operands[2];
3699 rtx op3 = operands[3];
3701 HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2);
3707 if (GET_CODE (op3) == SCRATCH)
3708 op3 = gen_reg_rtx (V4SFmode);
3709 emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, GEN_INT (ele)));
3712 emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp));
3715 [(set_attr "length" "8")
3716 (set_attr "type" "fp")])
3718 (define_insn_and_split "*vsx_extract_v4sf_<mode>_load"
3719 [(set (match_operand:SF 0 "register_operand" "=f,v,v,?r")
3721 (match_operand:V4SF 1 "memory_operand" "m,Z,m,m")
3722 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
3723 (clobber (match_scratch:P 3 "=&b,&b,&b,&b"))]
3724 "VECTOR_MEM_VSX_P (V4SFmode)"
3726 "&& reload_completed"
3727 [(set (match_dup 0) (match_dup 4))]
3729 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3730 operands[3], SFmode);
3732 [(set_attr "type" "fpload,fpload,fpload,load")
3733 (set_attr "length" "8")
3734 (set_attr "isa" "*,p7v,p9v,*")])
3736 ;; Variable V4SF extract from a register
3737 (define_insn_and_split "vsx_extract_v4sf_var"
3738 [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
3739 (unspec:SF [(match_operand:V4SF 1 "gpc_reg_operand" "v")
3740 (match_operand:DI 2 "gpc_reg_operand" "r")]
3741 UNSPEC_VSX_EXTRACT))
3742 (clobber (match_scratch:DI 3 "=r"))
3743 (clobber (match_scratch:V2DI 4 "=&v"))]
3744 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT"
3746 "&& reload_completed"
3749 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3750 operands[3], operands[4]);
3754 ;; Variable V4SF extract from memory
3755 (define_insn_and_split "*vsx_extract_v4sf_var_load"
3756 [(set (match_operand:SF 0 "gpc_reg_operand" "=wa,?r")
3757 (unspec:SF [(match_operand:V4SF 1 "memory_operand" "Q,Q")
3758 (match_operand:DI 2 "gpc_reg_operand" "r,r")]
3759 UNSPEC_VSX_EXTRACT))
3760 (clobber (match_scratch:DI 3 "=&b,&b"))]
3761 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT"
3763 "&& reload_completed"
3764 [(set (match_dup 0) (match_dup 4))]
3766 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3767 operands[3], SFmode);
3769 [(set_attr "type" "fpload,load")])
3771 ;; Expand the builtin form of xxpermdi to canonical rtl.
3772 (define_expand "vsx_xxpermdi_<mode>"
3773 [(match_operand:VSX_L 0 "vsx_register_operand")
3774 (match_operand:VSX_L 1 "vsx_register_operand")
3775 (match_operand:VSX_L 2 "vsx_register_operand")
3776 (match_operand:QI 3 "u5bit_cint_operand")]
3777 "VECTOR_MEM_VSX_P (<MODE>mode)"
3779 rtx target = operands[0];
3780 rtx op0 = operands[1];
3781 rtx op1 = operands[2];
3782 int mask = INTVAL (operands[3]);
3783 rtx perm0 = GEN_INT ((mask >> 1) & 1);
3784 rtx perm1 = GEN_INT ((mask & 1) + 2);
3785 rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
3787 if (<MODE>mode == V2DFmode)
3788 gen = gen_vsx_xxpermdi2_v2df_1;
3791 gen = gen_vsx_xxpermdi2_v2di_1;
3792 if (<MODE>mode != V2DImode)
3794 target = gen_lowpart (V2DImode, target);
3795 op0 = gen_lowpart (V2DImode, op0);
3796 op1 = gen_lowpart (V2DImode, op1);
3799 emit_insn (gen (target, op0, op1, perm0, perm1));
3803 ;; Special version of xxpermdi that retains big-endian semantics.
3804 (define_expand "vsx_xxpermdi_<mode>_be"
3805 [(match_operand:VSX_L 0 "vsx_register_operand")
3806 (match_operand:VSX_L 1 "vsx_register_operand")
3807 (match_operand:VSX_L 2 "vsx_register_operand")
3808 (match_operand:QI 3 "u5bit_cint_operand")]
3809 "VECTOR_MEM_VSX_P (<MODE>mode)"
3811 rtx target = operands[0];
3812 rtx op0 = operands[1];
3813 rtx op1 = operands[2];
3814 int mask = INTVAL (operands[3]);
3815 rtx perm0 = GEN_INT ((mask >> 1) & 1);
3816 rtx perm1 = GEN_INT ((mask & 1) + 2);
3817 rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
3819 if (<MODE>mode == V2DFmode)
3820 gen = gen_vsx_xxpermdi2_v2df_1;
3823 gen = gen_vsx_xxpermdi2_v2di_1;
3824 if (<MODE>mode != V2DImode)
3826 target = gen_lowpart (V2DImode, target);
3827 op0 = gen_lowpart (V2DImode, op0);
3828 op1 = gen_lowpart (V2DImode, op1);
3831 /* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a
3832 transformation we don't want; it is necessary for
3833 rs6000_expand_vec_perm_const_1 but not for this use. So we
3834 prepare for that by reversing the transformation here. */
3835 if (BYTES_BIG_ENDIAN)
3836 emit_insn (gen (target, op0, op1, perm0, perm1));
3839 rtx p0 = GEN_INT (3 - INTVAL (perm1));
3840 rtx p1 = GEN_INT (3 - INTVAL (perm0));
3841 emit_insn (gen (target, op1, op0, p0, p1));
3846 (define_insn "vsx_xxpermdi2_<mode>_1"
3847 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3849 (vec_concat:<VS_double>
3850 (match_operand:VSX_D 1 "vsx_register_operand" "wa")
3851 (match_operand:VSX_D 2 "vsx_register_operand" "wa"))
3852 (parallel [(match_operand 3 "const_0_to_1_operand" "")
3853 (match_operand 4 "const_2_to_3_operand" "")])))]
3854 "VECTOR_MEM_VSX_P (<MODE>mode)"
3858 /* For little endian, swap operands and invert/swap selectors
3859 to get the correct xxpermdi. The operand swap sets up the
3860 inputs as a little endian array. The selectors are swapped
3861 because they are defined to use big endian ordering. The
3862 selectors are inverted to get the correct doublewords for
3863 little endian ordering. */
3864 if (BYTES_BIG_ENDIAN)
3866 op3 = INTVAL (operands[3]);
3867 op4 = INTVAL (operands[4]);
3871 op3 = 3 - INTVAL (operands[4]);
3872 op4 = 3 - INTVAL (operands[3]);
3875 mask = (op3 << 1) | (op4 - 2);
3876 operands[3] = GEN_INT (mask);
3878 if (BYTES_BIG_ENDIAN)
3879 return "xxpermdi %x0,%x1,%x2,%3";
3881 return "xxpermdi %x0,%x2,%x1,%3";
3883 [(set_attr "type" "vecperm")])
3885 ;; Extraction of a single element in a small integer vector. Until ISA 3.0,
3886 ;; none of the small types were allowed in a vector register, so we had to
3887 ;; extract to a DImode and either do a direct move or store.
3888 (define_expand "vsx_extract_<mode>"
3889 [(parallel [(set (match_operand:<VEC_base> 0 "gpc_reg_operand")
3890 (vec_select:<VEC_base>
3891 (match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand")
3892 (parallel [(match_operand:QI 2 "const_int_operand")])))
3893 (clobber (match_scratch:VSX_EXTRACT_I2 3))])]
3894 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3896 /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}. */
3897 if (TARGET_P9_VECTOR)
3899 emit_insn (gen_vsx_extract_<mode>_p9 (operands[0], operands[1],
3905 (define_expand "vsx_extract_v4si"
3906 [(parallel [(set (match_operand:SI 0 "gpc_reg_operand")
3908 (match_operand:V4SI 1 "gpc_reg_operand")
3909 (parallel [(match_operand:QI 2 "const_0_to_3_operand")])))
3910 (clobber (match_scratch:V4SI 3))])]
3911 "TARGET_DIRECT_MOVE_64BIT"
3913 /* The word 1 (BE order) can be extracted by mfvsrwz/stxsiwx. So just
3914 fall through to vsx_extract_v4si_w1. */
3915 if (TARGET_P9_VECTOR
3916 && INTVAL (operands[2]) != (BYTES_BIG_ENDIAN ? 1 : 2))
3918 emit_insn (gen_vsx_extract_v4si_p9 (operands[0], operands[1],
3924 ;; Extract from word 1 (BE order).
3925 (define_insn "vsx_extract_v4si_w1"
3926 [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wa,Z,wa")
3928 (match_operand:V4SI 1 "gpc_reg_operand" "v,v,v,0")
3929 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
3930 (clobber (match_scratch:V4SI 3 "=v,v,v,v"))]
3931 "TARGET_DIRECT_MOVE_64BIT
3932 && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 1 : 2)"
3934 if (which_alternative == 0)
3935 return "mfvsrwz %0,%x1";
3937 if (which_alternative == 1)
3938 return "xxlor %x0,%x1,%x1";
3940 if (which_alternative == 2)
3941 return "stxsiwx %x1,%y0";
3943 return ASM_COMMENT_START " vec_extract to same register";
3945 [(set_attr "type" "mfvsr,veclogical,fpstore,*")
3946 (set_attr "length" "4,4,4,0")
3947 (set_attr "isa" "p8v,*,p8v,*")])
3949 (define_insn "*mfvsrwz"
3950 [(set (match_operand:DI 0 "register_operand" "=r")
3953 (match_operand:V4SI 1 "vsx_register_operand" "wa")
3954 (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
3955 (clobber (match_scratch:V4SI 3 "=v"))]
3956 "TARGET_DIRECT_MOVE_64BIT
3957 && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 1 : 2)"
3959 [(set_attr "type" "mfvsr")
3960 (set_attr "isa" "p8v")])
3962 (define_insn "vsx_extract_<mode>_p9"
3963 [(set (match_operand:<VEC_base> 0 "gpc_reg_operand" "=r,<VSX_EX>")
3964 (vec_select:<VEC_base>
3965 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,<VSX_EX>")
3966 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n,n")])))
3967 (clobber (match_scratch:SI 3 "=r,X"))]
3968 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3970 if (which_alternative == 0)
3975 HOST_WIDE_INT elt = INTVAL (operands[2]);
3976 HOST_WIDE_INT elt_adj = (!BYTES_BIG_ENDIAN
3977 ? GET_MODE_NUNITS (<MODE>mode) - 1 - elt
3980 HOST_WIDE_INT unit_size = GET_MODE_UNIT_SIZE (<MODE>mode);
3981 HOST_WIDE_INT offset = unit_size * elt_adj;
3983 operands[2] = GEN_INT (offset);
3985 return "xxextractuw %x0,%x1,%2";
3987 return "vextractu<wd> %0,%1,%2";
3990 [(set_attr "type" "vecsimple")
3991 (set_attr "isa" "p9v,*")])
3994 [(set (match_operand:<VEC_base> 0 "int_reg_operand")
3995 (vec_select:<VEC_base>
3996 (match_operand:VSX_EXTRACT_I 1 "altivec_register_operand")
3997 (parallel [(match_operand:QI 2 "const_int_operand")])))
3998 (clobber (match_operand:SI 3 "int_reg_operand"))]
3999 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB && reload_completed"
4002 rtx op0_si = gen_rtx_REG (SImode, REGNO (operands[0]));
4003 rtx op1 = operands[1];
4004 rtx op2 = operands[2];
4005 rtx op3 = operands[3];
4006 HOST_WIDE_INT offset = INTVAL (op2) * GET_MODE_UNIT_SIZE (<MODE>mode);
4008 emit_move_insn (op3, GEN_INT (offset));
4009 if (BYTES_BIG_ENDIAN)
4010 emit_insn (gen_vextu<wd>lx (op0_si, op3, op1));
4012 emit_insn (gen_vextu<wd>rx (op0_si, op3, op1));
4016 ;; Optimize zero extracts to eliminate the AND after the extract.
4017 (define_insn_and_split "*vsx_extract_<mode>_di_p9"
4018 [(set (match_operand:DI 0 "gpc_reg_operand" "=r,<VSX_EX>")
4020 (vec_select:<VEC_base>
4021 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,<VSX_EX>")
4022 (parallel [(match_operand:QI 2 "const_int_operand" "n,n")]))))
4023 (clobber (match_scratch:SI 3 "=r,X"))]
4024 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
4026 "&& reload_completed"
4027 [(parallel [(set (match_dup 4)
4028 (vec_select:<VEC_base>
4030 (parallel [(match_dup 2)])))
4031 (clobber (match_dup 3))])]
4033 gcc_assert (<MODE>mode != V4SImode
4034 || INTVAL (operands[2]) != (BYTES_BIG_ENDIAN ? 1 : 2));
4036 operands[4] = gen_rtx_REG (<VEC_base>mode, REGNO (operands[0]));
4038 [(set_attr "isa" "p9v,*")])
4040 ;; Optimize stores to use the ISA 3.0 scalar store instructions
4041 (define_insn_and_split "*vsx_extract_<mode>_store_p9"
4042 [(set (match_operand:<VEC_base> 0 "memory_operand" "=Z,m")
4043 (vec_select:<VEC_base>
4044 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>,v")
4045 (parallel [(match_operand:QI 2 "const_int_operand" "n,n")])))
4046 (clobber (match_scratch:<VEC_base> 3 "=<VSX_EX>,&*r"))
4047 (clobber (match_scratch:SI 4 "=X,&r"))]
4048 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
4050 "&& reload_completed"
4051 [(parallel [(set (match_dup 3)
4052 (vec_select:<VEC_base>
4054 (parallel [(match_dup 2)])))
4055 (clobber (match_dup 4))])
4059 if (which_alternative == 0
4060 && ((<MODE>mode == V16QImode
4061 && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 7 : 8))
4062 || (<MODE>mode == V8HImode
4063 && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 3 : 4))))
4065 enum machine_mode dest_mode = GET_MODE (operands[0]);
4066 emit_move_insn (operands[0],
4067 gen_rtx_REG (dest_mode, REGNO (operands[3])));
4073 ;; Extract from word 0, 2, 3 (BE order).
4074 (define_insn_and_split "*vsx_extract_v4si_w023"
4075 [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wa,Z")
4077 (match_operand:V4SI 1 "gpc_reg_operand" "v,v,v")
4078 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")])))
4079 (clobber (match_scratch:V4SI 3 "=v,v,v"))]
4080 "TARGET_DIRECT_MOVE_64BIT"
4082 "&& INTVAL (operands[2]) != (BYTES_BIG_ENDIAN ? 1 : 2)"
4085 gcc_assert (!TARGET_P9_VECTOR);
4087 rtx dest = operands[0];
4088 rtx src = operands[1];
4089 rtx element = operands[2];
4092 if (GET_CODE (operands[3]) == SCRATCH)
4093 vec_tmp = gen_reg_rtx (V4SImode);
4095 vec_tmp = operands[3];
4097 /* Adjust index for LE element ordering, the below minuend 3 is computed by
4098 GET_MODE_NUNITS (V4SImode) - 1. */
4099 if (!BYTES_BIG_ENDIAN)
4100 element = GEN_INT (3 - INTVAL (element));
4102 emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element));
4104 int value = BYTES_BIG_ENDIAN ? 1 : 2;
4105 emit_insn (gen_vsx_extract_v4si_w1 (dest, vec_tmp, GEN_INT (value)));
4110 (define_insn_and_split "*vsx_extract_<mode>_p8"
4111 [(set (match_operand:<VEC_base> 0 "nonimmediate_operand" "=r")
4112 (vec_select:<VEC_base>
4113 (match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand" "v")
4114 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
4115 (clobber (match_scratch:VSX_EXTRACT_I2 3 "=v"))]
4116 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT
4117 && !TARGET_P9_VECTOR"
4119 "&& reload_completed"
4122 rtx dest = operands[0];
4123 rtx src = operands[1];
4124 rtx element = operands[2];
4125 rtx vec_tmp = operands[3];
4128 if (!BYTES_BIG_ENDIAN)
4129 element = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1 - INTVAL (element));
4131 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
4133 value = INTVAL (element);
4134 if (<MODE>mode == V16QImode)
4137 emit_insn (gen_altivec_vspltb_direct (vec_tmp, src, element));
4141 else if (<MODE>mode == V8HImode)
4144 emit_insn (gen_altivec_vsplth_direct (vec_tmp, src, element));
4151 emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
4152 gen_rtx_REG (DImode, REGNO (vec_tmp)));
4155 [(set_attr "type" "mfvsr")])
4157 ;; Optimize extracting a single scalar element from memory.
4158 (define_insn_and_split "*vsx_extract_<mode>_load"
4159 [(set (match_operand:<VEC_base> 0 "register_operand" "=r")
4160 (vec_select:<VEC_base>
4161 (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m")
4162 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
4163 (clobber (match_scratch:DI 3 "=&b"))]
4164 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
4166 "&& reload_completed"
4167 [(set (match_dup 0) (match_dup 4))]
4169 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
4170 operands[3], <VEC_base>mode);
4172 [(set_attr "type" "load")
4173 (set_attr "length" "8")])
4175 ;; Variable V16QI/V8HI/V4SI extract from a register
4176 (define_insn_and_split "vsx_extract_<mode>_var"
4177 [(set (match_operand:<VEC_base> 0 "gpc_reg_operand" "=r,r")
4179 [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,v")
4180 (match_operand:DI 2 "gpc_reg_operand" "r,r")]
4181 UNSPEC_VSX_EXTRACT))
4182 (clobber (match_scratch:DI 3 "=r,r"))
4183 (clobber (match_scratch:V2DI 4 "=X,&v"))]
4184 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
4186 "&& reload_completed"
4189 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
4190 operands[3], operands[4]);
4193 [(set_attr "isa" "p9v,*")])
4195 ;; Variable V16QI/V8HI/V4SI extract from memory
4196 (define_insn_and_split "*vsx_extract_<mode>_var_load"
4197 [(set (match_operand:<VEC_base> 0 "gpc_reg_operand" "=r")
4199 [(match_operand:VSX_EXTRACT_I 1 "memory_operand" "Q")
4200 (match_operand:DI 2 "gpc_reg_operand" "r")]
4201 UNSPEC_VSX_EXTRACT))
4202 (clobber (match_scratch:DI 3 "=&b"))]
4203 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
4205 "&& reload_completed"
4206 [(set (match_dup 0) (match_dup 4))]
4208 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
4209 operands[3], <VEC_base>mode);
4211 [(set_attr "type" "load")])
4214 (define_expand "vextractl<mode>"
4215 [(set (match_operand:V2DI 0 "altivec_register_operand")
4216 (unspec:V2DI [(match_operand:VI2 1 "altivec_register_operand")
4217 (match_operand:VI2 2 "altivec_register_operand")
4218 (match_operand:SI 3 "register_operand")]
4222 if (BYTES_BIG_ENDIAN)
4224 emit_insn (gen_vextractl<mode>_internal (operands[0], operands[1],
4225 operands[2], operands[3]));
4226 emit_insn (gen_xxswapd_v2di (operands[0], operands[0]));
4229 emit_insn (gen_vextractr<mode>_internal (operands[0], operands[2],
4230 operands[1], operands[3]));
4234 (define_insn "vextractl<mode>_internal"
4235 [(set (match_operand:V2DI 0 "altivec_register_operand" "=v")
4236 (unspec:V2DI [(match_operand:VEC_I 1 "altivec_register_operand" "v")
4237 (match_operand:VEC_I 2 "altivec_register_operand" "v")
4238 (match_operand:SI 3 "register_operand" "r")]
4241 "vext<du_or_d><wd>vlx %0,%1,%2,%3"
4242 [(set_attr "type" "vecsimple")])
4244 (define_expand "vextractr<mode>"
4245 [(set (match_operand:V2DI 0 "altivec_register_operand")
4246 (unspec:V2DI [(match_operand:VI2 1 "altivec_register_operand")
4247 (match_operand:VI2 2 "altivec_register_operand")
4248 (match_operand:SI 3 "register_operand")]
4252 if (BYTES_BIG_ENDIAN)
4254 emit_insn (gen_vextractr<mode>_internal (operands[0], operands[1],
4255 operands[2], operands[3]));
4256 emit_insn (gen_xxswapd_v2di (operands[0], operands[0]));
4259 emit_insn (gen_vextractl<mode>_internal (operands[0], operands[2],
4260 operands[1], operands[3]));
4264 (define_insn "vextractr<mode>_internal"
4265 [(set (match_operand:V2DI 0 "altivec_register_operand" "=v")
4266 (unspec:V2DI [(match_operand:VEC_I 1 "altivec_register_operand" "v")
4267 (match_operand:VEC_I 2 "altivec_register_operand" "v")
4268 (match_operand:SI 3 "register_operand" "r")]
4271 "vext<du_or_d><wd>vrx %0,%1,%2,%3"
4272 [(set_attr "type" "vecsimple")])
4274 (define_expand "vinsertvl_<mode>"
4275 [(set (match_operand:VI2 0 "altivec_register_operand")
4276 (unspec:VI2 [(match_operand:VI2 1 "altivec_register_operand")
4277 (match_operand:VI2 2 "altivec_register_operand")
4278 (match_operand:SI 3 "register_operand" "r")]
4282 if (BYTES_BIG_ENDIAN)
4283 emit_insn (gen_vinsertvl_internal_<mode> (operands[0], operands[3],
4284 operands[1], operands[2]));
4286 emit_insn (gen_vinsertvr_internal_<mode> (operands[0], operands[3],
4287 operands[1], operands[2]));
4291 (define_insn "vinsertvl_internal_<mode>"
4292 [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v")
4293 (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r")
4294 (match_operand:VEC_I 2 "altivec_register_operand" "v")
4295 (match_operand:VEC_I 3 "altivec_register_operand" "0")]
4298 "vins<wd>vlx %0,%1,%2"
4299 [(set_attr "type" "vecsimple")])
4301 (define_expand "vinsertvr_<mode>"
4302 [(set (match_operand:VI2 0 "altivec_register_operand")
4303 (unspec:VI2 [(match_operand:VI2 1 "altivec_register_operand")
4304 (match_operand:VI2 2 "altivec_register_operand")
4305 (match_operand:SI 3 "register_operand" "r")]
4309 if (BYTES_BIG_ENDIAN)
4310 emit_insn (gen_vinsertvr_internal_<mode> (operands[0], operands[3],
4311 operands[1], operands[2]));
4313 emit_insn (gen_vinsertvl_internal_<mode> (operands[0], operands[3],
4314 operands[1], operands[2]));
4318 (define_insn "vinsertvr_internal_<mode>"
4319 [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v")
4320 (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r")
4321 (match_operand:VEC_I 2 "altivec_register_operand" "v")
4322 (match_operand:VEC_I 3 "altivec_register_operand" "0")]
4325 "vins<wd>vrx %0,%1,%2"
4326 [(set_attr "type" "vecsimple")])
4328 (define_expand "vinsertgl_<mode>"
4329 [(set (match_operand:VI2 0 "altivec_register_operand")
4330 (unspec:VI2 [(match_operand:SI 1 "register_operand")
4331 (match_operand:VI2 2 "altivec_register_operand")
4332 (match_operand:SI 3 "register_operand")]
4336 if (BYTES_BIG_ENDIAN)
4337 emit_insn (gen_vinsertgl_internal_<mode> (operands[0], operands[3],
4338 operands[1], operands[2]));
4340 emit_insn (gen_vinsertgr_internal_<mode> (operands[0], operands[3],
4341 operands[1], operands[2]));
4345 (define_insn "vinsertgl_internal_<mode>"
4346 [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v")
4347 (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r")
4348 (match_operand:SI 2 "register_operand" "r")
4349 (match_operand:VEC_I 3 "altivec_register_operand" "0")]
4352 "vins<wd>lx %0,%1,%2"
4353 [(set_attr "type" "vecsimple")])
4355 (define_expand "vinsertgr_<mode>"
4356 [(set (match_operand:VI2 0 "altivec_register_operand")
4357 (unspec:VI2 [(match_operand:SI 1 "register_operand")
4358 (match_operand:VI2 2 "altivec_register_operand")
4359 (match_operand:SI 3 "register_operand")]
4363 if (BYTES_BIG_ENDIAN)
4364 emit_insn (gen_vinsertgr_internal_<mode> (operands[0], operands[3],
4365 operands[1], operands[2]));
4367 emit_insn (gen_vinsertgl_internal_<mode> (operands[0], operands[3],
4368 operands[1], operands[2]));
4372 (define_insn "vinsertgr_internal_<mode>"
4373 [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v")
4374 (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r")
4375 (match_operand:SI 2 "register_operand" "r")
4376 (match_operand:VEC_I 3 "altivec_register_operand" "0")]
4379 "vins<wd>rx %0,%1,%2"
4380 [(set_attr "type" "vecsimple")])
4382 (define_expand "vreplace_elt_<mode>"
4383 [(set (match_operand:REPLACE_ELT_V 0 "register_operand")
4384 (unspec:REPLACE_ELT_V [(match_operand:REPLACE_ELT_V 1 "register_operand")
4385 (match_operand:<VEC_base> 2 "register_operand")
4386 (match_operand:QI 3 "const_0_to_3_operand")]
4387 UNSPEC_REPLACE_ELT))]
4391 /* Immediate value is the word index, convert to byte index and adjust for
4392 Endianness if needed. */
4393 if (BYTES_BIG_ENDIAN)
4394 index = INTVAL (operands[3]) << <REPLACE_ELT_sh>;
4397 index = <REPLACE_ELT_max> - (INTVAL (operands[3]) << <REPLACE_ELT_sh>);
4399 emit_insn (gen_vreplace_elt_<mode>_inst (operands[0], operands[1],
4404 [(set_attr "type" "vecsimple")])
4406 (define_insn "vreplace_elt_<mode>_inst"
4407 [(set (match_operand:REPLACE_ELT_V 0 "register_operand" "=v")
4408 (unspec:REPLACE_ELT_V [(match_operand:REPLACE_ELT_V 1 "register_operand" "0")
4409 (match_operand:<VEC_base> 2 "register_operand" "r")
4410 (match_operand:QI 3 "const_0_to_12_operand" "n")]
4411 UNSPEC_REPLACE_ELT))]
4413 "vins<REPLACE_ELT_char> %0,%2,%3"
4414 [(set_attr "type" "vecsimple")])
4416 (define_insn "vreplace_un_<mode>"
4417 [(set (match_operand:V16QI 0 "register_operand" "=v")
4418 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
4419 (match_operand:REPLACE_ELT 2 "register_operand" "r")
4420 (match_operand:QI 3 "const_0_to_12_operand" "n")]
4421 UNSPEC_REPLACE_UN))]
4423 "vins<REPLACE_ELT_char> %0,%2,%3"
4424 [(set_attr "type" "vecsimple")])
4426 ;; VSX_EXTRACT optimizations
4427 ;; Optimize double d = (double) vec_extract (vi, <n>)
4428 ;; Get the element into the top position and use XVCVSWDP/XVCVUWDP
4429 (define_insn_and_split "*vsx_extract_si_<uns>float_df"
4430 [(set (match_operand:DF 0 "gpc_reg_operand" "=wa")
4433 (match_operand:V4SI 1 "gpc_reg_operand" "v")
4434 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
4435 (clobber (match_scratch:V4SI 3 "=v"))]
4436 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
4441 rtx dest = operands[0];
4442 rtx src = operands[1];
4443 rtx element = operands[2];
4444 rtx v4si_tmp = operands[3];
4447 /* Adjust index for LE element ordering, the below minuend 3 is computed by
4448 GET_MODE_NUNITS (V4SImode) - 1. */
4449 if (!BYTES_BIG_ENDIAN)
4450 element = GEN_INT (3 - INTVAL (element));
4452 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
4454 value = INTVAL (element);
4457 if (GET_CODE (v4si_tmp) == SCRATCH)
4458 v4si_tmp = gen_reg_rtx (V4SImode);
4459 emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
4464 emit_insn (gen_vsx_xvcv<su>xwdp_df (dest, v4si_tmp));
4468 ;; Optimize <type> f = (<type>) vec_extract (vi, <n>)
4469 ;; where <type> is a floating point type that supported by the hardware that is
4470 ;; not double. First convert the value to double, and then to the desired
4472 (define_insn_and_split "*vsx_extract_si_<uns>float_<mode>"
4473 [(set (match_operand:VSX_EXTRACT_FL 0 "gpc_reg_operand" "=wa")
4474 (any_float:VSX_EXTRACT_FL
4476 (match_operand:V4SI 1 "gpc_reg_operand" "v")
4477 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
4478 (clobber (match_scratch:V4SI 3 "=v"))
4479 (clobber (match_scratch:DF 4 "=wa"))]
4480 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
4485 rtx dest = operands[0];
4486 rtx src = operands[1];
4487 rtx element = operands[2];
4488 rtx v4si_tmp = operands[3];
4489 rtx df_tmp = operands[4];
4492 /* Adjust index for LE element ordering, the below minuend 3 is computed by
4493 GET_MODE_NUNITS (V4SImode) - 1. */
4494 if (!BYTES_BIG_ENDIAN)
4495 element = GEN_INT (3 - INTVAL (element));
4497 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
4499 value = INTVAL (element);
4502 if (GET_CODE (v4si_tmp) == SCRATCH)
4503 v4si_tmp = gen_reg_rtx (V4SImode);
4504 emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
4509 if (GET_CODE (df_tmp) == SCRATCH)
4510 df_tmp = gen_reg_rtx (DFmode);
4512 emit_insn (gen_vsx_xvcv<su>xwdp_df (df_tmp, v4si_tmp));
4514 if (<MODE>mode == SFmode)
4515 emit_insn (gen_truncdfsf2 (dest, df_tmp));
4516 else if (<MODE>mode == TFmode && FLOAT128_IBM_P (TFmode))
4517 emit_insn (gen_extenddftf2_vsx (dest, df_tmp));
4518 else if (<MODE>mode == TFmode && FLOAT128_IEEE_P (TFmode)
4519 && TARGET_FLOAT128_HW)
4520 emit_insn (gen_extenddftf2_hw (dest, df_tmp));
4521 else if (<MODE>mode == IFmode && FLOAT128_IBM_P (IFmode))
4522 emit_insn (gen_extenddfif2 (dest, df_tmp));
4523 else if (<MODE>mode == KFmode && TARGET_FLOAT128_HW)
4524 emit_insn (gen_extenddfkf2_hw (dest, df_tmp));
4531 ;; Optimize <type> f = (<ftype>) vec_extract (<vtype>, <n>)
4532 ;; Where <ftype> is SFmode, DFmode (and KFmode/TFmode if those types are IEEE
4533 ;; 128-bit hardware types) and <vtype> is vector char, vector unsigned char,
4534 ;; vector short or vector unsigned short.
4535 (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VEC_base>_fl_<FL_CONV:mode>"
4536 [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=wa")
4538 (vec_select:<VSX_EXTRACT_I:VEC_base>
4539 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
4540 (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
4541 (clobber (match_scratch:<VSX_EXTRACT_I:VEC_base> 3 "=v"))]
4542 "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
4543 && TARGET_P9_VECTOR"
4545 "&& reload_completed"
4546 [(parallel [(set (match_dup 3)
4547 (vec_select:<VSX_EXTRACT_I:VEC_base>
4549 (parallel [(match_dup 2)])))
4550 (clobber (scratch:SI))])
4552 (sign_extend:DI (match_dup 3)))
4554 (float:<FL_CONV:MODE> (match_dup 4)))]
4556 operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
4558 [(set_attr "isa" "<FL_CONV:VSisa>")])
4560 (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VEC_base>_ufl_<FL_CONV:mode>"
4561 [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=wa")
4562 (unsigned_float:FL_CONV
4563 (vec_select:<VSX_EXTRACT_I:VEC_base>
4564 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
4565 (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
4566 (clobber (match_scratch:<VSX_EXTRACT_I:VEC_base> 3 "=v"))]
4567 "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
4568 && TARGET_P9_VECTOR"
4570 "&& reload_completed"
4571 [(parallel [(set (match_dup 3)
4572 (vec_select:<VSX_EXTRACT_I:VEC_base>
4574 (parallel [(match_dup 2)])))
4575 (clobber (scratch:SI))])
4577 (float:<FL_CONV:MODE> (match_dup 4)))]
4579 operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
4581 [(set_attr "isa" "<FL_CONV:VSisa>")])
4583 ;; V4SI/V8HI/V16QI set operation on ISA 3.0
4584 (define_insn "vsx_set_<mode>_p9"
4585 [(set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=<VSX_EX>")
4586 (unspec:VSX_EXTRACT_I
4587 [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "0")
4588 (match_operand:<VEC_base> 2 "gpc_reg_operand" "<VSX_EX>")
4589 (match_operand:QI 3 "<VSX_EXTRACT_PREDICATE>" "n")]
4591 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
4593 int ele = INTVAL (operands[3]);
4594 int nunits = GET_MODE_NUNITS (<MODE>mode);
4596 if (!BYTES_BIG_ENDIAN)
4597 ele = nunits - 1 - ele;
4599 operands[3] = GEN_INT (GET_MODE_SIZE (<VEC_base>mode) * ele);
4600 if (<MODE>mode == V4SImode)
4601 return "xxinsertw %x0,%x2,%3";
4603 return "vinsert<wd> %0,%2,%3";
4605 [(set_attr "type" "vecperm")])
4607 (define_insn_and_split "vsx_set_v4sf_p9"
4608 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4610 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4611 (match_operand:SF 2 "gpc_reg_operand" "wa")
4612 (match_operand:QI 3 "const_0_to_3_operand" "n")]
4614 (clobber (match_scratch:SI 4 "=&wa"))]
4615 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
4617 "&& reload_completed"
4619 (unspec:V4SF [(match_dup 2)]
4620 UNSPEC_VSX_CVDPSPN))
4621 (parallel [(set (match_dup 4)
4622 (vec_select:SI (match_dup 6)
4623 (parallel [(match_dup 7)])))
4624 (clobber (scratch:SI))])
4626 (unspec:V4SI [(match_dup 8)
4631 unsigned int tmp_regno = reg_or_subregno (operands[4]);
4633 operands[5] = gen_rtx_REG (V4SFmode, tmp_regno);
4634 operands[6] = gen_rtx_REG (V4SImode, tmp_regno);
4635 operands[7] = GEN_INT (BYTES_BIG_ENDIAN ? 0 : 3);
4636 operands[8] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
4638 [(set_attr "type" "vecperm")
4639 (set_attr "length" "12")
4640 (set_attr "isa" "p9v")])
4642 ;; Special case setting 0.0f to a V4SF element
4643 (define_insn_and_split "*vsx_set_v4sf_p9_zero"
4644 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4646 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4647 (match_operand:SF 2 "zero_fp_constant" "j")
4648 (match_operand:QI 3 "const_0_to_3_operand" "n")]
4650 (clobber (match_scratch:SI 4 "=&wa"))]
4651 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
4653 "&& reload_completed"
4657 (unspec:V4SI [(match_dup 5)
4662 operands[5] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
4664 [(set_attr "type" "vecperm")
4665 (set_attr "length" "8")
4666 (set_attr "isa" "p9v")])
4668 ;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is the element
4669 ;; that is in the default scalar position (1 for big endian, 2 for little
4670 ;; endian). We just need to do an xxinsertw since the element is in the
4671 ;; correct location.
4673 (define_insn "*vsx_insert_extract_v4sf_p9"
4674 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4676 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4677 (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
4679 [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
4680 (match_operand:QI 4 "const_0_to_3_operand" "n")]
4682 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64
4683 && (INTVAL (operands[3]) == (BYTES_BIG_ENDIAN ? 1 : 2))"
4685 int ele = INTVAL (operands[4]);
4687 /* Adjust index for LE element ordering, the below minuend 3 is computed by
4688 GET_MODE_NUNITS (V4SFmode) - 1. */
4689 if (!BYTES_BIG_ENDIAN)
4692 operands[4] = GEN_INT (GET_MODE_SIZE (SFmode) * ele);
4693 return "xxinsertw %x0,%x2,%4";
4695 [(set_attr "type" "vecperm")])
4697 ;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is not the element
4698 ;; that is in the default scalar position (1 for big endian, 2 for little
4699 ;; endian). Convert the insert/extract to int and avoid doing the conversion.
4701 (define_insn_and_split "*vsx_insert_extract_v4sf_p9_2"
4702 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4704 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4705 (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
4707 [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
4708 (match_operand:QI 4 "const_0_to_3_operand" "n")]
4710 (clobber (match_scratch:SI 5 "=&wa"))]
4711 "VECTOR_MEM_VSX_P (V4SFmode) && VECTOR_MEM_VSX_P (V4SImode)
4712 && TARGET_P9_VECTOR && TARGET_POWERPC64
4713 && (INTVAL (operands[3]) != (BYTES_BIG_ENDIAN ? 1 : 2))"
4716 [(parallel [(set (match_dup 5)
4717 (vec_select:SI (match_dup 6)
4718 (parallel [(match_dup 3)])))
4719 (clobber (scratch:SI))])
4721 (unspec:V4SI [(match_dup 8)
4726 if (GET_CODE (operands[5]) == SCRATCH)
4727 operands[5] = gen_reg_rtx (SImode);
4729 operands[6] = gen_lowpart (V4SImode, operands[2]);
4730 operands[7] = gen_lowpart (V4SImode, operands[0]);
4731 operands[8] = gen_lowpart (V4SImode, operands[1]);
4733 [(set_attr "type" "vecperm")
4734 (set_attr "isa" "p9v")])
4736 ;; Expanders for builtins
4737 (define_expand "vsx_mergel_<mode>"
4738 [(use (match_operand:VSX_D 0 "vsx_register_operand"))
4739 (use (match_operand:VSX_D 1 "vsx_register_operand"))
4740 (use (match_operand:VSX_D 2 "vsx_register_operand"))]
4741 "VECTOR_MEM_VSX_P (<MODE>mode)"
4743 rtvec v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
4744 rtx x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
4745 x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
4746 emit_insn (gen_rtx_SET (operands[0], x));
4750 (define_expand "vsx_mergeh_<mode>"
4751 [(use (match_operand:VSX_D 0 "vsx_register_operand"))
4752 (use (match_operand:VSX_D 1 "vsx_register_operand"))
4753 (use (match_operand:VSX_D 2 "vsx_register_operand"))]
4754 "VECTOR_MEM_VSX_P (<MODE>mode)"
4756 rtvec v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
4757 rtx x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
4758 x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
4759 emit_insn (gen_rtx_SET (operands[0], x));
4764 ;; We separate the register splat insn from the memory splat insn to force the
4765 ;; register allocator to generate the indexed form of the SPLAT when it is
4766 ;; given an offsettable memory reference. Otherwise, if the register and
4767 ;; memory insns were combined into a single insn, the register allocator will
4768 ;; load the value into a register, and then do a double word permute.
4769 (define_expand "vsx_splat_<mode>"
4770 [(set (match_operand:VSX_D 0 "vsx_register_operand")
4771 (vec_duplicate:VSX_D
4772 (match_operand:<VEC_base> 1 "input_operand")))]
4773 "VECTOR_MEM_VSX_P (<MODE>mode)"
4775 rtx op1 = operands[1];
4777 operands[1] = rs6000_force_indexed_or_indirect_mem (op1);
4779 operands[1] = force_reg (<VSX_D:VEC_base>mode, op1);
4782 (define_insn "vsx_splat_<mode>_reg"
4783 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we")
4784 (vec_duplicate:VSX_D
4785 (match_operand:<VEC_base> 1 "gpc_reg_operand" "wa,b")))]
4786 "VECTOR_MEM_VSX_P (<MODE>mode)"
4788 xxpermdi %x0,%x1,%x1,0
4790 [(set_attr "type" "vecperm,vecmove")])
4792 (define_insn "vsx_splat_<mode>_mem"
4793 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
4794 (vec_duplicate:VSX_D
4795 (match_operand:<VSX_D:VEC_base> 1 "memory_operand" "Z")))]
4796 "VECTOR_MEM_VSX_P (<MODE>mode)"
4798 [(set_attr "type" "vecload")])
4800 ;; V4SI splat support
4801 (define_insn "vsx_splat_v4si"
4802 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,wa")
4804 (match_operand:SI 1 "splat_input_operand" "r,Z")))]
4809 [(set_attr "type" "vecperm,vecload")])
4811 ;; SImode is not currently allowed in vector registers. This pattern
4812 ;; allows us to use direct move to get the value in a vector register
4813 ;; so that we can use XXSPLTW
4814 (define_insn "vsx_splat_v4si_di"
4815 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,we")
4818 (match_operand:DI 1 "gpc_reg_operand" "wa,r"))))]
4819 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
4823 [(set_attr "type" "vecperm")
4824 (set_attr "isa" "p8v,*")])
4826 ;; V4SF splat (ISA 3.0)
4827 (define_insn_and_split "vsx_splat_v4sf"
4828 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,wa")
4830 (match_operand:SF 1 "splat_input_operand" "Z,wa,r")))]
4836 "&& reload_completed && vsx_register_operand (operands[1], SFmode)"
4838 (unspec:V4SF [(match_dup 1)] UNSPEC_VSX_CVDPSPN))
4840 (unspec:V4SF [(match_dup 0)
4841 (const_int 0)] UNSPEC_VSX_XXSPLTW))]
4843 [(set_attr "type" "vecload,vecperm,vecperm")
4844 (set_attr "length" "*,8,*")
4845 (set_attr "isa" "*,p8v,*")])
4847 ;; V4SF/V4SI splat from a vector element
4848 (define_insn "vsx_xxspltw_<mode>"
4849 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4850 (vec_duplicate:VSX_W
4851 (vec_select:<VEC_base>
4852 (match_operand:VSX_W 1 "vsx_register_operand" "wa")
4854 [(match_operand:QI 2 "u5bit_cint_operand" "n")]))))]
4855 "VECTOR_MEM_VSX_P (<MODE>mode)"
4857 if (!BYTES_BIG_ENDIAN)
4858 operands[2] = GEN_INT (3 - INTVAL (operands[2]));
4860 return "xxspltw %x0,%x1,%2";
4862 [(set_attr "type" "vecperm")])
4864 (define_insn "vsx_xxspltw_<mode>_direct"
4865 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4866 (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "wa")
4867 (match_operand:QI 2 "u5bit_cint_operand" "i")]
4868 UNSPEC_VSX_XXSPLTW))]
4869 "VECTOR_MEM_VSX_P (<MODE>mode)"
4870 "xxspltw %x0,%x1,%2"
4871 [(set_attr "type" "vecperm")])
4873 ;; V16QI/V8HI splat support on ISA 2.07
4874 (define_insn "vsx_vsplt<VSX_SPLAT_SUFFIX>_di"
4875 [(set (match_operand:VSX_SPLAT_I 0 "altivec_register_operand" "=v")
4876 (vec_duplicate:VSX_SPLAT_I
4877 (truncate:<VEC_base>
4878 (match_operand:DI 1 "altivec_register_operand" "v"))))]
4879 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
4880 "vsplt<VSX_SPLAT_SUFFIX> %0,%1,<VSX_SPLAT_COUNT>"
4881 [(set_attr "type" "vecperm")])
4883 ;; V2DF/V2DI splat for use by vec_splat builtin
4884 (define_insn "vsx_xxspltd_<mode>"
4885 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
4886 (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
4887 (match_operand:QI 2 "u5bit_cint_operand" "i")]
4888 UNSPEC_VSX_XXSPLTD))]
4889 "VECTOR_MEM_VSX_P (<MODE>mode)"
4891 if ((BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 0)
4892 || (!BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 1))
4893 return "xxpermdi %x0,%x1,%x1,0";
4895 return "xxpermdi %x0,%x1,%x1,3";
4897 [(set_attr "type" "vecperm")])
4899 ;; Shift left double by word immediate
4900 (define_insn "vsx_xxsldwi_<mode>"
4901 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=wa")
4902 (unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "wa")
4903 (match_operand:VSX_L 2 "vsx_register_operand" "wa")
4904 (match_operand:QI 3 "u5bit_cint_operand" "i")]
4906 "VECTOR_MEM_VSX_P (<MODE>mode)"
4907 "xxsldwi %x0,%x1,%x2,%3"
4908 [(set_attr "type" "vecperm")
4909 (set_attr "isa" "<VSisa>")])
4912 ;; Vector reduction insns and splitters
4914 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v2df"
4915 [(set (match_operand:V2DF 0 "vfloat_operand" "=&wa,wa")
4919 (match_operand:V2DF 1 "vfloat_operand" "wa,wa")
4920 (parallel [(const_int 1)]))
4923 (parallel [(const_int 0)])))
4925 (clobber (match_scratch:V2DF 2 "=0,&wa"))]
4926 "VECTOR_UNIT_VSX_P (V2DFmode)"
4931 rtx tmp = (GET_CODE (operands[2]) == SCRATCH)
4932 ? gen_reg_rtx (V2DFmode)
4934 emit_insn (gen_vsx_xxsldwi_v2df (tmp, operands[1], operands[1], const2_rtx));
4935 emit_insn (gen_<VEC_reduc_rtx>v2df3 (operands[0], tmp, operands[1]));
4938 [(set_attr "length" "8")
4939 (set_attr "type" "veccomplex")])
4941 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v4sf"
4942 [(set (match_operand:V4SF 0 "vfloat_operand" "=wa")
4944 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
4945 (match_operand:V4SF 1 "vfloat_operand" "wa")))
4946 (clobber (match_scratch:V4SF 2 "=&wa"))
4947 (clobber (match_scratch:V4SF 3 "=&wa"))]
4948 "VECTOR_UNIT_VSX_P (V4SFmode)"
4953 rtx op0 = operands[0];
4954 rtx op1 = operands[1];
4955 rtx tmp2, tmp3, tmp4;
4957 if (can_create_pseudo_p ())
4959 tmp2 = gen_reg_rtx (V4SFmode);
4960 tmp3 = gen_reg_rtx (V4SFmode);
4961 tmp4 = gen_reg_rtx (V4SFmode);
4970 emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
4971 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
4972 emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
4973 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (op0, tmp4, tmp3));
4976 [(set_attr "length" "16")
4977 (set_attr "type" "veccomplex")])
4979 ;; Combiner patterns with the vector reduction patterns that knows we can get
4980 ;; to the top element of the V2DF array without doing an extract.
4982 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar"
4983 [(set (match_operand:DF 0 "vfloat_operand" "=&wa,wa")
4988 (match_operand:V2DF 1 "vfloat_operand" "wa,wa")
4989 (parallel [(const_int 1)]))
4992 (parallel [(const_int 0)])))
4994 (parallel [(const_int 1)])))
4995 (clobber (match_scratch:DF 2 "=0,&wa"))]
4996 "BYTES_BIG_ENDIAN && VECTOR_UNIT_VSX_P (V2DFmode)"
5001 rtx hi = gen_highpart (DFmode, operands[1]);
5002 rtx lo = (GET_CODE (operands[2]) == SCRATCH)
5003 ? gen_reg_rtx (DFmode)
5006 emit_insn (gen_vsx_extract_v2df (lo, operands[1], const1_rtx));
5007 emit_insn (gen_<VEC_reduc_rtx>df3 (operands[0], hi, lo));
5010 [(set_attr "length" "8")
5011 (set_attr "type" "veccomplex")])
5013 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf_scalar"
5014 [(set (match_operand:SF 0 "vfloat_operand" "=f")
5017 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
5018 (match_operand:V4SF 1 "vfloat_operand" "wa"))
5019 (parallel [(const_int 3)])))
5020 (clobber (match_scratch:V4SF 2 "=&wa"))
5021 (clobber (match_scratch:V4SF 3 "=&wa"))
5022 (clobber (match_scratch:V4SF 4 "=0"))]
5023 "BYTES_BIG_ENDIAN && VECTOR_UNIT_VSX_P (V4SFmode)"
5028 rtx op0 = operands[0];
5029 rtx op1 = operands[1];
5030 rtx tmp2, tmp3, tmp4, tmp5;
5032 if (can_create_pseudo_p ())
5034 tmp2 = gen_reg_rtx (V4SFmode);
5035 tmp3 = gen_reg_rtx (V4SFmode);
5036 tmp4 = gen_reg_rtx (V4SFmode);
5037 tmp5 = gen_reg_rtx (V4SFmode);
5047 emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
5048 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
5049 emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
5050 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp5, tmp4, tmp3));
5051 emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp5));
5054 [(set_attr "length" "20")
5055 (set_attr "type" "veccomplex")])
5058 ;; Power8 Vector fusion. The fused ops must be physically adjacent.
5060 [(set (match_operand:P 0 "base_reg_operand")
5061 (match_operand:P 1 "short_cint_operand"))
5062 (set (match_operand:VSX_M 2 "vsx_register_operand")
5063 (mem:VSX_M (plus:P (match_dup 0)
5064 (match_operand:P 3 "int_reg_operand"))))]
5065 "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
5066 "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"
5067 [(set_attr "length" "8")
5068 (set_attr "type" "vecload")])
5071 [(set (match_operand:P 0 "base_reg_operand")
5072 (match_operand:P 1 "short_cint_operand"))
5073 (set (match_operand:VSX_M 2 "vsx_register_operand")
5074 (mem:VSX_M (plus:P (match_operand:P 3 "int_reg_operand")
5076 "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
5077 "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"
5078 [(set_attr "length" "8")
5079 (set_attr "type" "vecload")])
5082 ;; ISA 3.1 vector extend sign support
5083 (define_insn "vsx_sign_extend_v2di_v1ti"
5084 [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
5085 (unspec:V1TI [(match_operand:V2DI 1 "vsx_register_operand" "v")]
5086 UNSPEC_VSX_SIGN_EXTEND))]
5089 [(set_attr "type" "vecexts")])
5091 ;; ISA 3.0 vector extend sign support
5093 (define_insn "vsx_sign_extend_v16qi_<mode>"
5094 [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
5096 [(match_operand:V16QI 1 "vsx_register_operand" "v")]
5097 UNSPEC_VSX_SIGN_EXTEND))]
5100 [(set_attr "type" "vecexts")])
5102 (define_insn "vsx_sign_extend_v8hi_<mode>"
5103 [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
5105 [(match_operand:V8HI 1 "vsx_register_operand" "v")]
5106 UNSPEC_VSX_SIGN_EXTEND))]
5109 [(set_attr "type" "vecexts")])
5111 (define_insn "vsx_sign_extend_v4si_v2di"
5112 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
5113 (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
5114 UNSPEC_VSX_SIGN_EXTEND))]
5117 [(set_attr "type" "vecexts")])
5119 ;; Sign extend DI to TI. We provide both GPR targets and Altivec targets on
5120 ;; power10. On earlier systems, the machine independent code will generate a
5121 ;; shift left to sign extend the 64-bit value to 128-bit.
5123 ;; If the register allocator prefers to use GPR registers, we will use a shift
5124 ;; left instruction to sign extend the 64-bit value to 128-bit.
5126 ;; If the register allocator prefers to use Altivec registers on power10,
5127 ;; generate the vextsd2q instruction.
5128 (define_insn_and_split "extendditi2"
5129 [(set (match_operand:TI 0 "register_operand" "=r,r,v,v,v")
5130 (sign_extend:TI (match_operand:DI 1 "input_operand" "r,m,b,wa,Z")))
5131 (clobber (reg:DI CA_REGNO))]
5132 "TARGET_POWERPC64 && TARGET_POWER10"
5134 "&& reload_completed"
5137 rtx dest = operands[0];
5138 rtx src = operands[1];
5139 int dest_regno = reg_or_subregno (dest);
5141 /* Handle conversion to GPR registers. Load up the low part and then do
5142 a sign extension to the upper part. */
5143 if (INT_REGNO_P (dest_regno))
5145 rtx dest_hi = gen_highpart (DImode, dest);
5146 rtx dest_lo = gen_lowpart (DImode, dest);
5148 emit_move_insn (dest_lo, src);
5149 /* In case src is a MEM, we have to use the destination, which is a
5150 register, instead of re-using the source. */
5151 rtx src2 = (REG_P (src) || SUBREG_P (src)) ? src : dest_lo;
5152 emit_insn (gen_ashrdi3 (dest_hi, src2, GEN_INT (63)));
5156 /* For conversion to an Altivec register, generate either a splat operation
5157 or a load rightmost double word instruction. Both instructions gets the
5158 DImode value into the lower 64 bits, and then do the vextsd2q
5161 else if (ALTIVEC_REGNO_P (dest_regno))
5164 emit_insn (gen_vsx_lxvrdx (dest, src));
5167 rtx dest_v2di = gen_rtx_REG (V2DImode, dest_regno);
5168 emit_insn (gen_vsx_splat_v2di (dest_v2di, src));
5171 emit_insn (gen_extendditi2_vector (dest, dest));
5178 [(set_attr "length" "8")
5179 (set_attr "type" "shift,load,vecmove,vecperm,load")])
5181 ;; Sign extend 64-bit value in TI reg, word 1, to 128-bit value in TI reg
5182 (define_insn "extendditi2_vector"
5183 [(set (match_operand:TI 0 "gpc_reg_operand" "=v")
5184 (unspec:TI [(match_operand:TI 1 "gpc_reg_operand" "v")]
5185 UNSPEC_EXTENDDITI2))]
5188 [(set_attr "type" "vecexts")])
5191 ;; ISA 3.0 Binary Floating-Point Support
5193 ;; VSX Scalar Extract Exponent Quad-Precision
5194 (define_insn "xsxexpqp_<IEEE128:mode>_<V2DI_DI:mode>"
5195 [(set (match_operand:V2DI_DI 0 "altivec_register_operand" "=v")
5197 [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
5198 UNSPEC_VSX_SXEXPDP))]
5201 [(set_attr "type" "vecmove")])
5203 ;; VSX Scalar Extract Exponent Double-Precision
5204 (define_insn "xsxexpdp_<mode>"
5205 [(set (match_operand:GPR 0 "register_operand" "=r")
5206 (unspec:GPR [(match_operand:DF 1 "vsx_register_operand" "wa")]
5207 UNSPEC_VSX_SXEXPDP))]
5210 [(set_attr "type" "integer")])
5212 ;; VSX Scalar Extract Significand Quad-Precision
5213 (define_insn "xsxsigqp_<IEEE128:mode>_<VEC_TI:mode>"
5214 [(set (match_operand:VEC_TI 0 "altivec_register_operand" "=v")
5215 (unspec:VEC_TI [(match_operand:IEEE128 1
5216 "altivec_register_operand" "v")]
5220 [(set_attr "type" "vecmove")])
5222 ;; VSX Scalar Extract Significand Double-Precision
5223 (define_insn "xsxsigdp"
5224 [(set (match_operand:DI 0 "register_operand" "=r")
5225 (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
5227 "TARGET_P9_VECTOR && TARGET_POWERPC64"
5229 [(set_attr "type" "integer")])
5231 ;; VSX Scalar Insert Exponent Quad-Precision Floating Point Argument
5232 (define_insn "xsiexpqpf_<mode>"
5233 [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
5235 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
5236 (match_operand:DI 2 "altivec_register_operand" "v")]
5237 UNSPEC_VSX_SIEXPQP))]
5240 [(set_attr "type" "vecmove")])
5242 ;; VSX Scalar Insert Exponent Quad-Precision
5243 (define_insn "xsiexpqp_<IEEE128:mode>_<V2DI_DI:mode>"
5244 [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
5245 (unspec:IEEE128 [(match_operand:<DI_to_TI> 1
5246 "altivec_register_operand" "v")
5247 (match_operand:V2DI_DI 2
5248 "altivec_register_operand" "v")]
5249 UNSPEC_VSX_SIEXPQP))]
5252 [(set_attr "type" "vecmove")])
5254 ;; VSX Scalar Insert Exponent Double-Precision
5255 (define_insn "xsiexpdp_<mode>"
5256 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
5257 (unspec:DF [(match_operand:DI 1 "register_operand" "r")
5258 (match_operand:GPR 2 "register_operand" "r")]
5259 UNSPEC_VSX_SIEXPDP))]
5260 "TARGET_P9_VECTOR && TARGET_POWERPC64"
5261 "xsiexpdp %x0,%1,%2"
5262 [(set_attr "type" "fpsimple")])
5264 ;; VSX Scalar Insert Exponent Double-Precision Floating Point Argument
5265 (define_insn "xsiexpdpf_<mode>"
5266 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
5267 (unspec:DF [(match_operand:DF 1 "register_operand" "r")
5268 (match_operand:GPR 2 "register_operand" "r")]
5269 UNSPEC_VSX_SIEXPDP))]
5270 "TARGET_P9_VECTOR && TARGET_POWERPC64"
5271 "xsiexpdp %x0,%1,%2"
5272 [(set_attr "type" "fpsimple")])
5274 ;; VSX Scalar Compare Exponents Double-Precision
5275 (define_expand "xscmpexpdp_<code>"
5279 [(match_operand:DF 1 "vsx_register_operand" "wa")
5280 (match_operand:DF 2 "vsx_register_operand" "wa")]
5281 UNSPEC_VSX_SCMPEXPDP)
5283 (set (match_operand:SI 0 "register_operand" "=r")
5284 (CMP_TEST:SI (match_dup 3)
5288 if (<CODE> == UNORDERED && !HONOR_NANS (DFmode))
5290 emit_move_insn (operands[0], const0_rtx);
5294 operands[3] = gen_reg_rtx (CCFPmode);
5297 (define_insn "*xscmpexpdp"
5298 [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
5300 (unspec:DF [(match_operand:DF 1 "vsx_register_operand" "wa")
5301 (match_operand:DF 2 "vsx_register_operand" "wa")]
5302 UNSPEC_VSX_SCMPEXPDP)
5303 (match_operand:SI 3 "zero_constant" "j")))]
5305 "xscmpexpdp %0,%x1,%x2"
5306 [(set_attr "type" "fpcompare")])
5308 ;; VSX Scalar Compare Exponents Quad-Precision
5309 (define_expand "xscmpexpqp_<code>_<mode>"
5313 [(match_operand:IEEE128 1 "vsx_register_operand" "v")
5314 (match_operand:IEEE128 2 "vsx_register_operand" "v")]
5315 UNSPEC_VSX_SCMPEXPQP)
5317 (set (match_operand:SI 0 "register_operand" "=r")
5318 (CMP_TEST:SI (match_dup 3)
5322 if (<CODE> == UNORDERED && !HONOR_NANS (<MODE>mode))
5324 emit_move_insn (operands[0], const0_rtx);
5328 operands[3] = gen_reg_rtx (CCFPmode);
5331 (define_insn "*xscmpexpqp"
5332 [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
5334 (unspec:IEEE128 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
5335 (match_operand:IEEE128 2 "altivec_register_operand" "v")]
5336 UNSPEC_VSX_SCMPEXPQP)
5337 (match_operand:SI 3 "zero_constant" "j")))]
5339 "xscmpexpqp %0,%1,%2"
5340 [(set_attr "type" "fpcompare")])
5342 ;; VSX Scalar Test Data Class Quad-Precision
5343 ;; (Expansion for scalar_test_data_class (__ieee128, int))
5344 ;; (Has side effect of setting the lt bit if operand 1 is negative,
5345 ;; setting the eq bit if any of the conditions tested by operand 2
5346 ;; are satisfied, and clearing the gt and undordered bits to zero.)
5347 (define_expand "xststdcqp_<mode>"
5351 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
5352 (match_operand:SI 2 "u7bit_cint_operand" "n")]
5355 (set (match_operand:SI 0 "register_operand" "=r")
5356 (eq:SI (match_dup 3)
5360 operands[3] = gen_reg_rtx (CCFPmode);
5363 ;; VSX Scalar Test Data Class Double- and Single-Precision
5364 ;; (The lt bit is set if operand 1 is negative. The eq bit is set
5365 ;; if any of the conditions tested by operand 2 are satisfied.
5366 ;; The gt and unordered bits are cleared to zero.)
5367 (define_expand "xststdc<sd>p"
5371 [(match_operand:SFDF 1 "vsx_register_operand" "wa")
5372 (match_operand:SI 2 "u7bit_cint_operand" "n")]
5375 (set (match_operand:SI 0 "register_operand" "=r")
5376 (eq:SI (match_dup 3)
5380 operands[3] = gen_reg_rtx (CCFPmode);
5381 operands[4] = CONST0_RTX (SImode);
5384 ;; The VSX Scalar Test Negative Quad-Precision
5385 (define_expand "xststdcnegqp_<mode>"
5389 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
5393 (set (match_operand:SI 0 "register_operand" "=r")
5394 (lt:SI (match_dup 2)
5398 operands[2] = gen_reg_rtx (CCFPmode);
5401 ;; The VSX Scalar Test Negative Double- and Single-Precision
5402 (define_expand "xststdcneg<sd>p"
5406 [(match_operand:SFDF 1 "vsx_register_operand" "wa")
5410 (set (match_operand:SI 0 "register_operand" "=r")
5411 (lt:SI (match_dup 2)
5415 operands[2] = gen_reg_rtx (CCFPmode);
5416 operands[3] = CONST0_RTX (SImode);
5419 (define_insn "*xststdcqp_<mode>"
5420 [(set (match_operand:CCFP 0 "" "=y")
5423 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
5424 (match_operand:SI 2 "u7bit_cint_operand" "n")]
5428 "xststdcqp %0,%1,%2"
5429 [(set_attr "type" "fpcompare")])
5431 (define_insn "*xststdc<sd>p"
5432 [(set (match_operand:CCFP 0 "" "=y")
5434 (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa")
5435 (match_operand:SI 2 "u7bit_cint_operand" "n")]
5437 (match_operand:SI 3 "zero_constant" "j")))]
5439 "xststdc<sd>p %0,%x1,%2"
5440 [(set_attr "type" "fpcompare")])
5442 ;; VSX Vector Extract Exponent Double and Single Precision
5443 (define_insn "xvxexp<sd>p"
5444 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
5446 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
5449 "xvxexp<sd>p %x0,%x1"
5450 [(set_attr "type" "vecsimple")])
5452 ;; VSX Vector Extract Significand Double and Single Precision
5453 (define_insn "xvxsig<sd>p"
5454 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
5456 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
5459 "xvxsig<sd>p %x0,%x1"
5460 [(set_attr "type" "vecsimple")])
5462 ;; VSX Vector Insert Exponent Double and Single Precision
5463 (define_insn "xviexp<sd>p"
5464 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
5466 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
5467 (match_operand:VSX_F 2 "vsx_register_operand" "wa")]
5470 "xviexp<sd>p %x0,%x1,%x2"
5471 [(set_attr "type" "vecsimple")])
5473 ;; VSX Vector Test Data Class Double and Single Precision
5474 ;; The corresponding elements of the result vector are all ones
5475 ;; if any of the conditions tested by operand 3 are satisfied.
5476 (define_insn "xvtstdc<sd>p"
5477 [(set (match_operand:<VSI> 0 "vsx_register_operand" "=wa")
5479 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
5480 (match_operand:SI 2 "u7bit_cint_operand" "n")]
5481 UNSPEC_VSX_VTSTDC))]
5483 "xvtstdc<sd>p %x0,%x1,%2"
5484 [(set_attr "type" "vecsimple")])
5486 ;; ISA 3.0 String Operations Support
5488 ;; Compare vectors producing a vector result and a predicate, setting CR6
5489 ;; to indicate a combined status. This pattern matches v16qi, v8hi, and
5490 ;; v4si modes. It does not match v2df, v4sf, or v2di modes. There's no
5491 ;; need to match v4sf, v2df, or v2di modes because those are expanded
5492 ;; to use Power8 instructions.
5493 (define_insn "*vsx_ne_<mode>_p"
5494 [(set (reg:CC CR6_REGNO)
5496 [(ne:CC (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
5497 (match_operand:VSX_EXTRACT_I 2 "gpc_reg_operand" "v"))]
5499 (set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=v")
5500 (ne:VSX_EXTRACT_I (match_dup 1)
5503 "vcmpne<VSX_EXTRACT_WIDTH>. %0,%1,%2"
5504 [(set_attr "type" "vecsimple")])
5506 (define_insn "*vector_nez_<mode>_p"
5507 [(set (reg:CC CR6_REGNO)
5508 (unspec:CC [(unspec:VI
5509 [(match_operand:VI 1 "gpc_reg_operand" "v")
5510 (match_operand:VI 2 "gpc_reg_operand" "v")]
5513 (set (match_operand:VI 0 "gpc_reg_operand" "=v")
5514 (unspec:VI [(match_dup 1)
5518 "vcmpnez<VSX_EXTRACT_WIDTH>. %0,%1,%2"
5519 [(set_attr "type" "vecsimple")])
5521 ;; Return first position of match between vectors using natural order
5522 ;; for both LE and BE execution modes.
5523 (define_expand "first_match_index_<mode>"
5524 [(match_operand:SI 0 "register_operand")
5525 (unspec:SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
5526 (match_operand:VSX_EXTRACT_I 2 "register_operand")]
5527 UNSPEC_VSX_FIRST_MATCH_INDEX)]
5532 rtx cmp_result = gen_reg_rtx (<MODE>mode);
5533 rtx not_result = gen_reg_rtx (<MODE>mode);
5535 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
5537 emit_insn (gen_one_cmpl<mode>2 (not_result, cmp_result));
5539 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
5541 if (<MODE>mode == V16QImode)
5543 if (!BYTES_BIG_ENDIAN)
5544 emit_insn (gen_vctzlsbb_<mode> (operands[0], not_result));
5546 emit_insn (gen_vclzlsbb_<mode> (operands[0], not_result));
5550 rtx tmp = gen_reg_rtx (SImode);
5551 if (!BYTES_BIG_ENDIAN)
5552 emit_insn (gen_vctzlsbb_<mode> (tmp, not_result));
5554 emit_insn (gen_vclzlsbb_<mode> (tmp, not_result));
5555 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
5560 ;; Return first position of match between vectors or end of string (EOS) using
5561 ;; natural element order for both LE and BE execution modes.
5562 (define_expand "first_match_or_eos_index_<mode>"
5563 [(match_operand:SI 0 "register_operand")
5564 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
5565 (match_operand:VSX_EXTRACT_I 2 "register_operand")]
5566 UNSPEC_VSX_FIRST_MATCH_EOS_INDEX)]
5570 rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
5571 rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
5572 rtx cmpz_result = gen_reg_rtx (<MODE>mode);
5573 rtx and_result = gen_reg_rtx (<MODE>mode);
5574 rtx result = gen_reg_rtx (<MODE>mode);
5575 rtx vzero = gen_reg_rtx (<MODE>mode);
5577 /* Vector with zeros in elements that correspond to zeros in operands. */
5578 emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
5579 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
5580 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
5581 emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
5583 /* Vector with ones in elments that do not match. */
5584 emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
5587 /* Create vector with ones in elements where there was a zero in one of
5588 the source elements or the elements that match. */
5589 emit_insn (gen_nand<mode>3 (result, and_result, cmpz_result));
5590 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
5592 if (<MODE>mode == V16QImode)
5594 if (!BYTES_BIG_ENDIAN)
5595 emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
5597 emit_insn (gen_vclzlsbb_<mode> (operands[0], result));
5601 rtx tmp = gen_reg_rtx (SImode);
5602 if (!BYTES_BIG_ENDIAN)
5603 emit_insn (gen_vctzlsbb_<mode> (tmp, result));
5605 emit_insn (gen_vclzlsbb_<mode> (tmp, result));
5606 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
5611 ;; Return first position of mismatch between vectors using natural
5612 ;; element order for both LE and BE execution modes.
5613 (define_expand "first_mismatch_index_<mode>"
5614 [(match_operand:SI 0 "register_operand")
5615 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
5616 (match_operand:VSX_EXTRACT_I 2 "register_operand")]
5617 UNSPEC_VSX_FIRST_MISMATCH_INDEX)]
5621 rtx cmp_result = gen_reg_rtx (<MODE>mode);
5623 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
5625 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
5627 if (<MODE>mode == V16QImode)
5629 if (!BYTES_BIG_ENDIAN)
5630 emit_insn (gen_vctzlsbb_<mode> (operands[0], cmp_result));
5632 emit_insn (gen_vclzlsbb_<mode> (operands[0], cmp_result));
5636 rtx tmp = gen_reg_rtx (SImode);
5637 if (!BYTES_BIG_ENDIAN)
5638 emit_insn (gen_vctzlsbb_<mode> (tmp, cmp_result));
5640 emit_insn (gen_vclzlsbb_<mode> (tmp, cmp_result));
5641 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
5646 ;; Return first position of mismatch between vectors or end of string (EOS)
5647 ;; using natural element order for both LE and BE execution modes.
5648 (define_expand "first_mismatch_or_eos_index_<mode>"
5649 [(match_operand:SI 0 "register_operand")
5650 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
5651 (match_operand:VSX_EXTRACT_I 2 "register_operand")]
5652 UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX)]
5656 rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
5657 rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
5658 rtx cmpz_result = gen_reg_rtx (<MODE>mode);
5659 rtx not_cmpz_result = gen_reg_rtx (<MODE>mode);
5660 rtx and_result = gen_reg_rtx (<MODE>mode);
5661 rtx result = gen_reg_rtx (<MODE>mode);
5662 rtx vzero = gen_reg_rtx (<MODE>mode);
5664 /* Vector with zeros in elements that correspond to zeros in operands. */
5665 emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
5667 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
5668 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
5669 emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
5671 /* Vector with ones in elments that match. */
5672 emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
5674 emit_insn (gen_one_cmpl<mode>2 (not_cmpz_result, cmpz_result));
5676 /* Create vector with ones in elements where there was a zero in one of
5677 the source elements or the elements did not match. */
5678 emit_insn (gen_nand<mode>3 (result, and_result, not_cmpz_result));
5679 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
5681 if (<MODE>mode == V16QImode)
5683 if (!BYTES_BIG_ENDIAN)
5684 emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
5686 emit_insn (gen_vclzlsbb_<mode> (operands[0], result));
5690 rtx tmp = gen_reg_rtx (SImode);
5691 if (!BYTES_BIG_ENDIAN)
5692 emit_insn (gen_vctzlsbb_<mode> (tmp, result));
5694 emit_insn (gen_vclzlsbb_<mode> (tmp, result));
5695 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
5700 ;; Load VSX Vector with Length
5701 (define_expand "lxvl"
5703 (ashift:DI (match_operand:DI 2 "register_operand")
5705 (set (match_operand:V16QI 0 "vsx_register_operand")
5707 [(match_operand:DI 1 "gpc_reg_operand")
5708 (mem:V16QI (match_dup 1))
5711 "TARGET_P9_VECTOR && TARGET_64BIT"
5713 operands[3] = gen_reg_rtx (DImode);
5716 (define_insn "*lxvl"
5717 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5719 [(match_operand:DI 1 "gpc_reg_operand" "b")
5720 (mem:V16QI (match_dup 1))
5721 (match_operand:DI 2 "register_operand" "r")]
5723 "TARGET_P9_VECTOR && TARGET_64BIT"
5725 [(set_attr "type" "vecload")])
5727 (define_insn "lxvll"
5728 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5729 (unspec:V16QI [(match_operand:DI 1 "gpc_reg_operand" "b")
5730 (mem:V16QI (match_dup 1))
5731 (match_operand:DI 2 "register_operand" "r")]
5735 [(set_attr "type" "vecload")])
5737 ;; Expand for builtin xl_len_r
5738 (define_expand "xl_len_r"
5739 [(match_operand:V16QI 0 "vsx_register_operand")
5740 (match_operand:DI 1 "register_operand")
5741 (match_operand:DI 2 "register_operand")]
5744 rtx shift_mask = gen_reg_rtx (V16QImode);
5745 rtx rtx_vtmp = gen_reg_rtx (V16QImode);
5746 rtx tmp = gen_reg_rtx (DImode);
5748 emit_insn (gen_altivec_lvsl_reg_di (shift_mask, operands[2]));
5749 emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
5750 emit_insn (gen_lxvll (rtx_vtmp, operands[1], tmp));
5751 emit_insn (gen_altivec_vperm_v8hiv16qi (operands[0], rtx_vtmp, rtx_vtmp,
5756 (define_insn "stxvll"
5757 [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
5758 (unspec:V16QI [(match_operand:V16QI 0 "vsx_register_operand" "wa")
5759 (mem:V16QI (match_dup 1))
5760 (match_operand:DI 2 "register_operand" "r")]
5764 [(set_attr "type" "vecstore")])
5766 ;; Store VSX Vector with Length
5767 (define_expand "stxvl"
5769 (ashift:DI (match_operand:DI 2 "register_operand")
5771 (set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand"))
5773 [(match_operand:V16QI 0 "vsx_register_operand")
5774 (mem:V16QI (match_dup 1))
5777 "TARGET_P9_VECTOR && TARGET_64BIT"
5779 operands[3] = gen_reg_rtx (DImode);
5782 ;; Define optab for vector access with length vectorization exploitation.
5783 (define_expand "len_load_v16qi"
5784 [(match_operand:V16QI 0 "vlogical_operand")
5785 (match_operand:V16QI 1 "memory_operand")
5786 (match_operand:QI 2 "gpc_reg_operand")
5787 (match_operand:QI 3 "zero_constant")]
5788 "TARGET_P9_VECTOR && TARGET_64BIT"
5790 rtx mem = XEXP (operands[1], 0);
5791 mem = force_reg (DImode, mem);
5792 rtx len = gen_lowpart (DImode, operands[2]);
5793 emit_insn (gen_lxvl (operands[0], mem, len));
5797 (define_expand "len_store_v16qi"
5798 [(match_operand:V16QI 0 "memory_operand")
5799 (match_operand:V16QI 1 "vlogical_operand")
5800 (match_operand:QI 2 "gpc_reg_operand")
5801 (match_operand:QI 3 "zero_constant")
5803 "TARGET_P9_VECTOR && TARGET_64BIT"
5805 rtx mem = XEXP (operands[0], 0);
5806 mem = force_reg (DImode, mem);
5807 rtx len = gen_lowpart (DImode, operands[2]);
5808 emit_insn (gen_stxvl (operands[1], mem, len));
5812 (define_insn "*stxvl"
5813 [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
5815 [(match_operand:V16QI 0 "vsx_register_operand" "wa")
5816 (mem:V16QI (match_dup 1))
5817 (match_operand:DI 2 "register_operand" "r")]
5819 "TARGET_P9_VECTOR && TARGET_64BIT"
5821 [(set_attr "type" "vecstore")])
5823 ;; Expand for builtin xst_len_r
5824 (define_expand "xst_len_r"
5825 [(match_operand:V16QI 0 "vsx_register_operand" "=wa")
5826 (match_operand:DI 1 "register_operand" "b")
5827 (match_operand:DI 2 "register_operand" "r")]
5830 rtx shift_mask = gen_reg_rtx (V16QImode);
5831 rtx rtx_vtmp = gen_reg_rtx (V16QImode);
5832 rtx tmp = gen_reg_rtx (DImode);
5834 emit_insn (gen_altivec_lvsr_reg_di (shift_mask, operands[2]));
5835 emit_insn (gen_altivec_vperm_v8hiv16qi (rtx_vtmp, operands[0], operands[0],
5837 emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
5838 emit_insn (gen_stxvll (rtx_vtmp, operands[1], tmp));
5842 ;; Vector Compare Not Equal Byte (specified/not+eq:)
5843 (define_insn "vcmpneb"
5844 [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5846 (eq:V16QI (match_operand:V16QI 1 "altivec_register_operand" "v")
5847 (match_operand:V16QI 2 "altivec_register_operand" "v"))))]
5850 [(set_attr "type" "vecsimple")])
5852 ;; Vector Compare Not Equal v1ti (specified/not+eq:)
5853 (define_expand "vcmpnet"
5854 [(set (match_operand:V1TI 0 "altivec_register_operand")
5856 (eq:V1TI (match_operand:V1TI 1 "altivec_register_operand")
5857 (match_operand:V1TI 2 "altivec_register_operand"))))]
5860 emit_insn (gen_eqvv1ti3 (operands[0], operands[1], operands[2]));
5861 emit_insn (gen_one_cmplv1ti2 (operands[0], operands[0]));
5865 ;; Vector Compare Not Equal or Zero Byte
5866 (define_insn "vcmpnezb"
5867 [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5869 [(match_operand:V16QI 1 "altivec_register_operand" "v")
5870 (match_operand:V16QI 2 "altivec_register_operand" "v")]
5874 [(set_attr "type" "vecsimple")])
5876 ;; Vector Compare Not Equal or Zero Byte predicate or record-form
5877 (define_insn "vcmpnezb_p"
5878 [(set (reg:CC CR6_REGNO)
5880 [(match_operand:V16QI 1 "altivec_register_operand" "v")
5881 (match_operand:V16QI 2 "altivec_register_operand" "v")]
5883 (set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5889 "vcmpnezb. %0,%1,%2"
5890 [(set_attr "type" "vecsimple")])
5892 ;; Vector Compare Not Equal Half Word (specified/not+eq:)
5893 (define_insn "vcmpneh"
5894 [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
5896 (eq:V8HI (match_operand:V8HI 1 "altivec_register_operand" "v")
5897 (match_operand:V8HI 2 "altivec_register_operand" "v"))))]
5900 [(set_attr "type" "vecsimple")])
5902 ;; Vector Compare Not Equal or Zero Half Word
5903 (define_insn "vcmpnezh"
5904 [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
5905 (unspec:V8HI [(match_operand:V8HI 1 "altivec_register_operand" "v")
5906 (match_operand:V8HI 2 "altivec_register_operand" "v")]
5910 [(set_attr "type" "vecsimple")])
5912 ;; Vector Compare Not Equal Word (specified/not+eq:)
5913 (define_insn "vcmpnew"
5914 [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
5916 (eq:V4SI (match_operand:V4SI 1 "altivec_register_operand" "v")
5917 (match_operand:V4SI 2 "altivec_register_operand" "v"))))]
5920 [(set_attr "type" "vecsimple")])
5922 ;; Vector Compare Not Equal or Zero Word
5923 (define_insn "vcmpnezw"
5924 [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
5925 (unspec:V4SI [(match_operand:V4SI 1 "altivec_register_operand" "v")
5926 (match_operand:V4SI 2 "altivec_register_operand" "v")]
5930 [(set_attr "type" "vecsimple")])
5932 ;; Vector Count Leading/Trailing Zero Least-Significant Bits Byte
5933 (define_insn "*vc<vczlsbb_char>zlsbb_zext_<mode>"
5934 [(set (match_operand:DI 0 "register_operand" "=r")
5937 [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
5940 "vc<vczlsbb_char>zlsbb %0,%1"
5941 [(set_attr "type" "vecsimple")])
5943 (define_insn "vc<vczlsbb_char>zlsbb_<mode>"
5944 [(set (match_operand:SI 0 "register_operand" "=r")
5946 [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
5949 "vc<vczlsbb_char>zlsbb %0,%1"
5950 [(set_attr "type" "vecsimple")])
5952 ;; Vector Extract Unsigned Byte Left-Indexed
5953 (define_insn "vextublx"
5954 [(set (match_operand:SI 0 "register_operand" "=r")
5956 [(match_operand:SI 1 "register_operand" "r")
5957 (match_operand:V16QI 2 "altivec_register_operand" "v")]
5961 [(set_attr "type" "vecsimple")])
5963 ;; Vector Extract Unsigned Byte Right-Indexed
5964 (define_insn "vextubrx"
5965 [(set (match_operand:SI 0 "register_operand" "=r")
5967 [(match_operand:SI 1 "register_operand" "r")
5968 (match_operand:V16QI 2 "altivec_register_operand" "v")]
5972 [(set_attr "type" "vecsimple")])
5974 ;; Vector Extract Unsigned Half Word Left-Indexed
5975 (define_insn "vextuhlx"
5976 [(set (match_operand:SI 0 "register_operand" "=r")
5978 [(match_operand:SI 1 "register_operand" "r")
5979 (match_operand:V8HI 2 "altivec_register_operand" "v")]
5983 [(set_attr "type" "vecsimple")])
5985 ;; Vector Extract Unsigned Half Word Right-Indexed
5986 (define_insn "vextuhrx"
5987 [(set (match_operand:SI 0 "register_operand" "=r")
5989 [(match_operand:SI 1 "register_operand" "r")
5990 (match_operand:V8HI 2 "altivec_register_operand" "v")]
5994 [(set_attr "type" "vecsimple")])
5996 ;; Vector Extract Unsigned Word Left-Indexed
5997 (define_insn "vextuwlx"
5998 [(set (match_operand:SI 0 "register_operand" "=r")
6000 [(match_operand:SI 1 "register_operand" "r")
6001 (match_operand:V4SI 2 "altivec_register_operand" "v")]
6005 [(set_attr "type" "vecsimple")])
6007 ;; Vector Extract Unsigned Word Right-Indexed
6008 (define_insn "vextuwrx"
6009 [(set (match_operand:SI 0 "register_operand" "=r")
6011 [(match_operand:SI 1 "register_operand" "r")
6012 (match_operand:V4SI 2 "altivec_register_operand" "v")]
6016 [(set_attr "type" "vecsimple")])
6018 ;; Vector insert/extract word at arbitrary byte values. Note, the little
6019 ;; endian version needs to adjust the byte number, and the V4SI element in
6021 (define_insn "extract4b"
6022 [(set (match_operand:V2DI 0 "vsx_register_operand")
6023 (unspec:V2DI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
6024 (match_operand:QI 2 "const_0_to_12_operand" "n")]
6025 UNSPEC_XXEXTRACTUW))]
6028 if (!BYTES_BIG_ENDIAN)
6029 operands[2] = GEN_INT (12 - INTVAL (operands[2]));
6031 return "xxextractuw %x0,%x1,%2";
6034 (define_expand "insert4b"
6035 [(set (match_operand:V16QI 0 "vsx_register_operand")
6036 (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand")
6037 (match_operand:V16QI 2 "vsx_register_operand")
6038 (match_operand:QI 3 "const_0_to_12_operand")]
6042 if (!BYTES_BIG_ENDIAN)
6044 rtx op1 = operands[1];
6045 rtx v4si_tmp = gen_reg_rtx (V4SImode);
6046 emit_insn (gen_vsx_xxpermdi_v4si_be (v4si_tmp, op1, op1, const1_rtx));
6047 operands[1] = v4si_tmp;
6048 operands[3] = GEN_INT (12 - INTVAL (operands[3]));
6052 (define_insn "*insert4b_internal"
6053 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
6054 (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand" "wa")
6055 (match_operand:V16QI 2 "vsx_register_operand" "0")
6056 (match_operand:QI 3 "const_0_to_12_operand" "n")]
6059 "xxinsertw %x0,%x1,%3"
6060 [(set_attr "type" "vecperm")])
6063 ;; Generate vector extract four float 32 values from left four elements
6064 ;; of eight element vector of float 16 values.
6065 (define_expand "vextract_fp_from_shorth"
6066 [(set (match_operand:V4SF 0 "register_operand" "=wa")
6067 (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
6068 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH))]
6072 int vals_le[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0};
6073 int vals_be[16] = {0, 0, 0, 1, 0, 0, 2, 3, 0, 0, 4, 5, 0, 0, 6, 7};
6076 rtx mask = gen_reg_rtx (V16QImode);
6077 rtx tmp = gen_reg_rtx (V16QImode);
6080 for (i = 0; i < 16; i++)
6081 if (!BYTES_BIG_ENDIAN)
6082 rvals[i] = GEN_INT (vals_le[i]);
6084 rvals[i] = GEN_INT (vals_be[i]);
6086 /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
6087 inputs in half words 1,3,5,7 (IBM numbering). Use xxperm to move
6088 src half words 0,1,2,3 (LE), src half words 4,5,6,7 (BE) for the
6089 conversion instruction. */
6090 v = gen_rtvec_v (16, rvals);
6091 emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
6092 emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
6093 operands[1], mask));
6094 emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
6098 ;; Generate vector extract four float 32 values from right four elements
6099 ;; of eight element vector of float 16 values.
6100 (define_expand "vextract_fp_from_shortl"
6101 [(set (match_operand:V4SF 0 "register_operand" "=wa")
6102 (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
6103 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL))]
6106 int vals_le[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0};
6107 int vals_be[16] = {0, 0, 8, 9, 0, 0, 10, 11, 0, 0, 12, 13, 0, 0, 14, 15};
6111 rtx mask = gen_reg_rtx (V16QImode);
6112 rtx tmp = gen_reg_rtx (V16QImode);
6115 for (i = 0; i < 16; i++)
6116 if (!BYTES_BIG_ENDIAN)
6117 rvals[i] = GEN_INT (vals_le[i]);
6119 rvals[i] = GEN_INT (vals_be[i]);
6121 /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
6122 inputs in half words 1,3,5,7 (IBM numbering). Use xxperm to move
6123 src half words 4,5,6,7 (LE), src half words 0,1,2,3 (BE) for the
6124 conversion instruction. */
6125 v = gen_rtvec_v (16, rvals);
6126 emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
6127 emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
6128 operands[1], mask));
6129 emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
6133 ;; Support for ISA 3.0 vector byte reverse
6135 ;; Swap all bytes with in a vector
6136 (define_insn "p9_xxbrq_v1ti"
6137 [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
6138 (bswap:V1TI (match_operand:V1TI 1 "vsx_register_operand" "wa")))]
6141 [(set_attr "type" "vecperm")])
6143 (define_expand "p9_xxbrq_v16qi"
6144 [(use (match_operand:V16QI 0 "vsx_register_operand" "=wa"))
6145 (use (match_operand:V16QI 1 "vsx_register_operand" "wa"))]
6148 rtx op0 = gen_reg_rtx (V1TImode);
6149 rtx op1 = gen_lowpart (V1TImode, operands[1]);
6150 emit_insn (gen_p9_xxbrq_v1ti (op0, op1));
6151 emit_move_insn (operands[0], gen_lowpart (V16QImode, op0));
6155 ;; Swap all bytes in each 64-bit element
6156 (define_insn "p9_xxbrd_v2di"
6157 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
6158 (bswap:V2DI (match_operand:V2DI 1 "vsx_register_operand" "wa")))]
6161 [(set_attr "type" "vecperm")])
6163 (define_expand "p9_xxbrd_v2df"
6164 [(use (match_operand:V2DF 0 "vsx_register_operand" "=wa"))
6165 (use (match_operand:V2DF 1 "vsx_register_operand" "wa"))]
6168 rtx op0 = gen_reg_rtx (V2DImode);
6169 rtx op1 = gen_lowpart (V2DImode, operands[1]);
6170 emit_insn (gen_p9_xxbrd_v2di (op0, op1));
6171 emit_move_insn (operands[0], gen_lowpart (V2DFmode, op0));
6175 ;; Swap all bytes in each 32-bit element
6176 (define_insn "p9_xxbrw_v4si"
6177 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
6178 (bswap:V4SI (match_operand:V4SI 1 "vsx_register_operand" "wa")))]
6181 [(set_attr "type" "vecperm")])
6183 (define_expand "p9_xxbrw_v4sf"
6184 [(use (match_operand:V4SF 0 "vsx_register_operand" "=wa"))
6185 (use (match_operand:V4SF 1 "vsx_register_operand" "wa"))]
6188 rtx op0 = gen_reg_rtx (V4SImode);
6189 rtx op1 = gen_lowpart (V4SImode, operands[1]);
6190 emit_insn (gen_p9_xxbrw_v4si (op0, op1));
6191 emit_move_insn (operands[0], gen_lowpart (V4SFmode, op0));
6195 ;; Swap all bytes in each element of vector
6196 (define_expand "revb_<mode>"
6197 [(use (match_operand:VEC_REVB 0 "vsx_register_operand"))
6198 (use (match_operand:VEC_REVB 1 "vsx_register_operand"))]
6201 if (TARGET_P9_VECTOR)
6202 emit_insn (gen_p9_xxbr<VSX_XXBR>_<mode> (operands[0], operands[1]));
6205 if (<MODE>mode == V8HImode)
6207 rtx splt = gen_reg_rtx (V8HImode);
6208 emit_insn (gen_altivec_vspltish (splt, GEN_INT (8)));
6209 emit_insn (gen_altivec_vrlh (operands[0], operands[1], splt));
6213 /* Want to have the elements in reverse order relative
6214 to the endian mode in use, i.e. in LE mode, put elements
6216 rtx sel = swap_endian_selector_for_mode (<MODE>mode);
6217 emit_insn (gen_altivec_vperm_<mode>_direct (operands[0], operands[1],
6225 ;; Reversing bytes in vector char is just a NOP.
6226 (define_expand "revb_v16qi"
6227 [(set (match_operand:V16QI 0 "vsx_register_operand")
6228 (bswap:V16QI (match_operand:V16QI 1 "vsx_register_operand")))]
6231 emit_move_insn (operands[0], operands[1]);
6235 ;; Swap all bytes in each 16-bit element
6236 (define_insn "p9_xxbrh_v8hi"
6237 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
6238 (bswap:V8HI (match_operand:V8HI 1 "vsx_register_operand" "wa")))]
6241 [(set_attr "type" "vecperm")])
6244 ;; Operand numbers for the following peephole2
6246 [(SFBOOL_TMP_GPR 0) ;; GPR temporary
6247 (SFBOOL_TMP_VSX 1) ;; vector temporary
6248 (SFBOOL_MFVSR_D 2) ;; move to gpr dest
6249 (SFBOOL_MFVSR_A 3) ;; move to gpr src
6250 (SFBOOL_BOOL_D 4) ;; and/ior/xor dest
6251 (SFBOOL_BOOL_A1 5) ;; and/ior/xor arg1
6252 (SFBOOL_BOOL_A2 6) ;; and/ior/xor arg1
6253 (SFBOOL_SHL_D 7) ;; shift left dest
6254 (SFBOOL_SHL_A 8) ;; shift left arg
6255 (SFBOOL_MTVSR_D 9) ;; move to vecter dest
6256 (SFBOOL_MFVSR_A_V4SF 10) ;; SFBOOL_MFVSR_A as V4SFmode
6257 (SFBOOL_BOOL_A_DI 11) ;; SFBOOL_BOOL_A1/A2 as DImode
6258 (SFBOOL_TMP_VSX_DI 12) ;; SFBOOL_TMP_VSX as DImode
6259 (SFBOOL_MTVSR_D_V4SF 13)]) ;; SFBOOL_MTVSRD_D as V4SFmode
6261 ;; Attempt to optimize some common GLIBC operations using logical operations to
6262 ;; pick apart SFmode operations. For example, there is code from e_powf.c
6263 ;; after macro expansion that looks like:
6268 ;; } ieee_float_shape_type;
6274 ;; ieee_float_shape_type gf_u;
6275 ;; gf_u.value = (t1);
6276 ;; (is) = gf_u.word;
6280 ;; ieee_float_shape_type sf_u;
6281 ;; sf_u.word = (is & 0xfffff000);
6282 ;; (t1) = sf_u.value;
6286 ;; This would result in two direct move operations (convert to memory format,
6287 ;; direct move to GPR, do the AND operation, direct move to VSX, convert to
6288 ;; scalar format). With this peephole, we eliminate the direct move to the
6289 ;; GPR, and instead move the integer mask value to the vector register after a
6290 ;; shift and do the VSX logical operation.
6292 ;; The insns for dealing with SFmode in GPR registers looks like:
6293 ;; (set (reg:V4SF reg2) (unspec:V4SF [(reg:SF reg1)] UNSPEC_VSX_CVDPSPN))
6295 ;; (set (reg:DI reg3) (unspec:DI [(reg:V4SF reg2)] UNSPEC_P8V_RELOAD_FROM_VSX))
6297 ;; (set (reg:DI reg4) (and:DI (reg:DI reg3) (reg:DI reg3)))
6299 ;; (set (reg:DI reg5) (ashift:DI (reg:DI reg4) (const_int 32)))
6301 ;; (set (reg:SF reg6) (unspec:SF [(reg:DI reg5)] UNSPEC_P8V_MTVSRD))
6303 ;; (set (reg:SF reg6) (unspec:SF [(reg:SF reg6)] UNSPEC_VSX_CVSPDPN))
6306 [(match_scratch:DI SFBOOL_TMP_GPR "r")
6307 (match_scratch:V4SF SFBOOL_TMP_VSX "wa")
6309 ;; MFVSRWZ (aka zero_extend)
6310 (set (match_operand:DI SFBOOL_MFVSR_D "int_reg_operand")
6312 (match_operand:SI SFBOOL_MFVSR_A "vsx_register_operand")))
6314 ;; AND/IOR/XOR operation on int
6315 (set (match_operand:SI SFBOOL_BOOL_D "int_reg_operand")
6316 (and_ior_xor:SI (match_operand:SI SFBOOL_BOOL_A1 "int_reg_operand")
6317 (match_operand:SI SFBOOL_BOOL_A2 "reg_or_cint_operand")))
6320 (set (match_operand:DI SFBOOL_SHL_D "int_reg_operand")
6321 (ashift:DI (match_operand:DI SFBOOL_SHL_A "int_reg_operand")
6325 (set (match_operand:SF SFBOOL_MTVSR_D "vsx_register_operand")
6326 (unspec:SF [(match_dup SFBOOL_SHL_D)] UNSPEC_P8V_MTVSRD))]
6328 "TARGET_POWERPC64 && TARGET_DIRECT_MOVE
6329 /* The REG_P (xxx) tests prevents SUBREG's, which allows us to use REGNO
6330 to compare registers, when the mode is different. */
6331 && REG_P (operands[SFBOOL_MFVSR_D]) && REG_P (operands[SFBOOL_BOOL_D])
6332 && REG_P (operands[SFBOOL_BOOL_A1]) && REG_P (operands[SFBOOL_SHL_D])
6333 && REG_P (operands[SFBOOL_SHL_A]) && REG_P (operands[SFBOOL_MTVSR_D])
6334 && (REG_P (operands[SFBOOL_BOOL_A2])
6335 || CONST_INT_P (operands[SFBOOL_BOOL_A2]))
6336 && (REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_MFVSR_D])
6337 || peep2_reg_dead_p (2, operands[SFBOOL_MFVSR_D]))
6338 && (REGNO (operands[SFBOOL_MFVSR_D]) == REGNO (operands[SFBOOL_BOOL_A1])
6339 || (REG_P (operands[SFBOOL_BOOL_A2])
6340 && REGNO (operands[SFBOOL_MFVSR_D])
6341 == REGNO (operands[SFBOOL_BOOL_A2])))
6342 && REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_SHL_A])
6343 && (REGNO (operands[SFBOOL_SHL_D]) == REGNO (operands[SFBOOL_BOOL_D])
6344 || peep2_reg_dead_p (3, operands[SFBOOL_BOOL_D]))
6345 && peep2_reg_dead_p (4, operands[SFBOOL_SHL_D])"
6346 [(set (match_dup SFBOOL_TMP_GPR)
6347 (ashift:DI (match_dup SFBOOL_BOOL_A_DI)
6350 (set (match_dup SFBOOL_TMP_VSX_DI)
6351 (match_dup SFBOOL_TMP_GPR))
6353 (set (match_dup SFBOOL_MTVSR_D_V4SF)
6354 (and_ior_xor:V4SF (match_dup SFBOOL_MFVSR_A_V4SF)
6355 (match_dup SFBOOL_TMP_VSX)))]
6357 rtx bool_a1 = operands[SFBOOL_BOOL_A1];
6358 rtx bool_a2 = operands[SFBOOL_BOOL_A2];
6359 int regno_mfvsr_d = REGNO (operands[SFBOOL_MFVSR_D]);
6360 int regno_mfvsr_a = REGNO (operands[SFBOOL_MFVSR_A]);
6361 int regno_tmp_vsx = REGNO (operands[SFBOOL_TMP_VSX]);
6362 int regno_mtvsr_d = REGNO (operands[SFBOOL_MTVSR_D]);
6364 if (CONST_INT_P (bool_a2))
6366 rtx tmp_gpr = operands[SFBOOL_TMP_GPR];
6367 emit_move_insn (tmp_gpr, bool_a2);
6368 operands[SFBOOL_BOOL_A_DI] = tmp_gpr;
6372 int regno_bool_a1 = REGNO (bool_a1);
6373 int regno_bool_a2 = REGNO (bool_a2);
6374 int regno_bool_a = (regno_mfvsr_d == regno_bool_a1
6375 ? regno_bool_a2 : regno_bool_a1);
6376 operands[SFBOOL_BOOL_A_DI] = gen_rtx_REG (DImode, regno_bool_a);
6379 operands[SFBOOL_MFVSR_A_V4SF] = gen_rtx_REG (V4SFmode, regno_mfvsr_a);
6380 operands[SFBOOL_TMP_VSX_DI] = gen_rtx_REG (DImode, regno_tmp_vsx);
6381 operands[SFBOOL_MTVSR_D_V4SF] = gen_rtx_REG (V4SFmode, regno_mtvsr_d);
6384 ;; Support signed/unsigned long long to float conversion vectorization.
6385 ;; Note that any_float (pc) here is just for code attribute <su>.
6386 (define_expand "vec_pack<su>_float_v2di"
6387 [(match_operand:V4SF 0 "vfloat_operand")
6388 (match_operand:V2DI 1 "vint_operand")
6389 (match_operand:V2DI 2 "vint_operand")
6393 rtx r1 = gen_reg_rtx (V4SFmode);
6394 rtx r2 = gen_reg_rtx (V4SFmode);
6395 emit_insn (gen_vsx_xvcv<su>xdsp (r1, operands[1]));
6396 emit_insn (gen_vsx_xvcv<su>xdsp (r2, operands[2]));
6397 rs6000_expand_extract_even (operands[0], r1, r2);
6401 ;; Support float to signed/unsigned long long conversion vectorization.
6402 ;; Note that any_fix (pc) here is just for code attribute <su>.
6403 (define_expand "vec_unpack_<su>fix_trunc_hi_v4sf"
6404 [(match_operand:V2DI 0 "vint_operand")
6405 (match_operand:V4SF 1 "vfloat_operand")
6409 rtx reg = gen_reg_rtx (V4SFmode);
6410 rs6000_expand_interleave (reg, operands[1], operands[1], BYTES_BIG_ENDIAN);
6411 emit_insn (gen_vsx_xvcvsp<su>xds (operands[0], reg));
6415 ;; Note that any_fix (pc) here is just for code attribute <su>.
6416 (define_expand "vec_unpack_<su>fix_trunc_lo_v4sf"
6417 [(match_operand:V2DI 0 "vint_operand")
6418 (match_operand:V4SF 1 "vfloat_operand")
6422 rtx reg = gen_reg_rtx (V4SFmode);
6423 rs6000_expand_interleave (reg, operands[1], operands[1], !BYTES_BIG_ENDIAN);
6424 emit_insn (gen_vsx_xvcvsp<su>xds (operands[0], reg));
6428 (define_insn "vsx_<xvcvbf16>"
6429 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
6430 (unspec:V16QI [(match_operand:V16QI 1 "vsx_register_operand" "wa")]
6433 "<xvcvbf16> %x0,%x1"
6434 [(set_attr "type" "vecfloat")])
6436 (define_insn "vec_mtvsrbmi"
6437 [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
6438 (unspec:V16QI [(match_operand:QI 1 "u6bit_cint_operand" "n")]
6444 (define_insn "vec_mtvsr_<mode>"
6445 [(set (match_operand:VSX_MM 0 "altivec_register_operand" "=v")
6446 (unspec:VSX_MM [(match_operand:DI 1 "gpc_reg_operand" "r")]
6450 [(set_attr "type" "vecsimple")])
6452 (define_insn "vec_cntmb_<mode>"
6453 [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
6454 (unspec:DI [(match_operand:VSX_MM4 1 "altivec_register_operand" "v")
6455 (match_operand:QI 2 "const_0_to_1_operand" "n")]
6458 "vcntmb<wd> %0,%1,%2"
6459 [(set_attr "type" "vecsimple")])
6461 (define_insn "vec_extract_<mode>"
6462 [(set (match_operand:SI 0 "register_operand" "=r")
6463 (unspec:SI [(match_operand:VSX_MM 1 "altivec_register_operand" "v")]
6466 "vextract<wd>m %0,%1"
6467 [(set_attr "type" "vecsimple")])
6469 (define_insn "vec_expand_<mode>"
6470 [(set (match_operand:VSX_MM 0 "vsx_register_operand" "=v")
6471 (unspec:VSX_MM [(match_operand:VSX_MM 1 "vsx_register_operand" "v")]
6474 "vexpand<wd>m %0,%1"
6475 [(set_attr "type" "vecsimple")])
6477 (define_insn "dives_<mode>"
6478 [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
6479 (unspec:VIlong [(match_operand:VIlong 1 "vsx_register_operand" "v")
6480 (match_operand:VIlong 2 "vsx_register_operand" "v")]
6483 "vdives<wd> %0,%1,%2"
6484 [(set_attr "type" "vecdiv")
6485 (set_attr "size" "<bits>")])
6487 (define_insn "diveu_<mode>"
6488 [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
6489 (unspec:VIlong [(match_operand:VIlong 1 "vsx_register_operand" "v")
6490 (match_operand:VIlong 2 "vsx_register_operand" "v")]
6493 "vdiveu<wd> %0,%1,%2"
6494 [(set_attr "type" "vecdiv")
6495 (set_attr "size" "<bits>")])
6497 (define_insn "div<mode>3"
6498 [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
6499 (div:VIlong (match_operand:VIlong 1 "vsx_register_operand" "v")
6500 (match_operand:VIlong 2 "vsx_register_operand" "v")))]
6502 "vdivs<wd> %0,%1,%2"
6503 [(set_attr "type" "vecdiv")
6504 (set_attr "size" "<bits>")])
6506 (define_insn "udiv<mode>3"
6507 [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
6508 (udiv:VIlong (match_operand:VIlong 1 "vsx_register_operand" "v")
6509 (match_operand:VIlong 2 "vsx_register_operand" "v")))]
6511 "vdivu<wd> %0,%1,%2"
6512 [(set_attr "type" "vecdiv")
6513 (set_attr "size" "<bits>")])
6515 (define_insn "mod<mode>3"
6516 [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
6517 (mod:VIlong (match_operand:VIlong 1 "vsx_register_operand" "v")
6518 (match_operand:VIlong 2 "vsx_register_operand" "v")))]
6520 "vmods<wd> %0,%1,%2"
6521 [(set_attr "type" "vecdiv")
6522 (set_attr "size" "<bits>")])
6524 (define_insn "umod<mode>3"
6525 [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
6526 (umod:VIlong (match_operand:VIlong 1 "vsx_register_operand" "v")
6527 (match_operand:VIlong 2 "vsx_register_operand" "v")))]
6529 "vmodu<wd> %0,%1,%2"
6530 [(set_attr "type" "vecdiv")
6531 (set_attr "size" "<bits>")])
6533 (define_insn "smul<mode>3_highpart"
6534 [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
6535 (mult:VIlong (ashiftrt
6536 (match_operand:VIlong 1 "vsx_register_operand" "v")
6539 (match_operand:VIlong 2 "vsx_register_operand" "v")
6542 "vmulhs<wd> %0,%1,%2"
6543 [(set_attr "type" "veccomplex")])
6545 (define_insn "umul<mode>3_highpart"
6546 [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
6547 (us_mult:VIlong (ashiftrt
6548 (match_operand:VIlong 1 "vsx_register_operand" "v")
6551 (match_operand:VIlong 2 "vsx_register_operand" "v")
6554 "vmulhu<wd> %0,%1,%2"
6555 [(set_attr "type" "veccomplex")])
6557 ;; Vector multiply low double word
6558 (define_insn "mulv2di3"
6559 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
6560 (mult:V2DI (match_operand:V2DI 1 "vsx_register_operand" "v")
6561 (match_operand:V2DI 2 "vsx_register_operand" "v")))]
6564 [(set_attr "type" "veccomplex")])
6567 ;; XXSPLTIW built-in function support
6568 (define_insn "xxspltiw_v4si"
6569 [(set (match_operand:V4SI 0 "register_operand" "=wa")
6570 (unspec:V4SI [(match_operand:SI 1 "s32bit_cint_operand" "n")]
6574 [(set_attr "type" "vecperm")
6575 (set_attr "prefixed" "yes")])
6577 (define_expand "xxspltiw_v4sf"
6578 [(set (match_operand:V4SF 0 "register_operand" "=wa")
6579 (unspec:V4SF [(match_operand:SF 1 "const_double_operand" "n")]
6583 long value = rs6000_const_f32_to_i32 (operands[1]);
6584 emit_insn (gen_xxspltiw_v4sf_inst (operands[0], GEN_INT (value)));
6588 (define_insn "xxspltiw_v4sf_inst"
6589 [(set (match_operand:V4SF 0 "register_operand" "=wa")
6590 (unspec:V4SF [(match_operand:SI 1 "c32bit_cint_operand" "n")]
6594 [(set_attr "type" "vecperm")
6595 (set_attr "prefixed" "yes")])
6597 ;; XXSPLTIDP built-in function support
6598 (define_expand "xxspltidp_v2df"
6599 [(set (match_operand:V2DF 0 "register_operand" )
6600 (unspec:V2DF [(match_operand:SF 1 "const_double_operand")]
6604 long value = rs6000_const_f32_to_i32 (operands[1]);
6605 rs6000_emit_xxspltidp_v2df (operands[0], value);
6609 (define_insn "xxspltidp_v2df_inst"
6610 [(set (match_operand:V2DF 0 "register_operand" "=wa")
6611 (unspec:V2DF [(match_operand:SI 1 "c32bit_cint_operand" "n")]
6615 [(set_attr "type" "vecperm")
6616 (set_attr "prefixed" "yes")])
6618 ;; XXSPLTI32DX built-in function support
6619 (define_expand "xxsplti32dx_v4si"
6620 [(set (match_operand:V4SI 0 "register_operand" "=wa")
6621 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6622 (match_operand:QI 2 "u1bit_cint_operand" "n")
6623 (match_operand:SI 3 "s32bit_cint_operand" "n")]
6624 UNSPEC_XXSPLTI32DX))]
6627 int index = INTVAL (operands[2]);
6629 if (!BYTES_BIG_ENDIAN)
6632 emit_insn (gen_xxsplti32dx_v4si_inst (operands[0], operands[1],
6633 GEN_INT (index), operands[3]));
6636 [(set_attr "type" "vecperm")])
6638 (define_insn "xxsplti32dx_v4si_inst"
6639 [(set (match_operand:V4SI 0 "register_operand" "=wa")
6640 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6641 (match_operand:QI 2 "u1bit_cint_operand" "n")
6642 (match_operand:SI 3 "s32bit_cint_operand" "n")]
6643 UNSPEC_XXSPLTI32DX))]
6645 "xxsplti32dx %x0,%2,%3"
6646 [(set_attr "type" "vecperm")
6647 (set_attr "prefixed" "yes")])
6649 (define_expand "xxsplti32dx_v4sf"
6650 [(set (match_operand:V4SF 0 "register_operand" "=wa")
6651 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
6652 (match_operand:QI 2 "u1bit_cint_operand" "n")
6653 (match_operand:SF 3 "const_double_operand" "n")]
6654 UNSPEC_XXSPLTI32DX))]
6657 int index = INTVAL (operands[2]);
6658 long value = rs6000_const_f32_to_i32 (operands[3]);
6659 if (!BYTES_BIG_ENDIAN)
6662 emit_insn (gen_xxsplti32dx_v4sf_inst (operands[0], operands[1],
6663 GEN_INT (index), GEN_INT (value)));
6667 (define_insn "xxsplti32dx_v4sf_inst"
6668 [(set (match_operand:V4SF 0 "register_operand" "=wa")
6669 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
6670 (match_operand:QI 2 "u1bit_cint_operand" "n")
6671 (match_operand:SI 3 "s32bit_cint_operand" "n")]
6672 UNSPEC_XXSPLTI32DX))]
6674 "xxsplti32dx %x0,%2,%3"
6675 [(set_attr "type" "vecperm")
6676 (set_attr "prefixed" "yes")])
6678 ;; XXBLEND built-in function support
6679 (define_insn "xxblend_<mode>"
6680 [(set (match_operand:VM3 0 "register_operand" "=wa")
6681 (unspec:VM3 [(match_operand:VM3 1 "register_operand" "wa")
6682 (match_operand:VM3 2 "register_operand" "wa")
6683 (match_operand:VM3 3 "register_operand" "wa")]
6686 "xxblendv<VM3_char> %x0,%x1,%x2,%x3"
6687 [(set_attr "type" "vecperm")
6688 (set_attr "prefixed" "yes")])
6690 ;; XXPERMX built-in function support
6691 (define_expand "xxpermx"
6692 [(set (match_operand:V2DI 0 "register_operand" "+wa")
6693 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "wa")
6694 (match_operand:V2DI 2 "register_operand" "wa")
6695 (match_operand:V16QI 3 "register_operand" "wa")
6696 (match_operand:QI 4 "u8bit_cint_operand" "n")]
6700 if (BYTES_BIG_ENDIAN)
6701 emit_insn (gen_xxpermx_inst (operands[0], operands[1],
6702 operands[2], operands[3],
6706 /* Reverse value of byte element indexes by XORing with 0xFF.
6707 Reverse the 32-byte section identifier match by subracting bits [0:2]
6708 of elemet from 7. */
6709 int value = INTVAL (operands[4]);
6710 rtx vreg = gen_reg_rtx (V16QImode);
6712 emit_insn (gen_xxspltib_v16qi (vreg, GEN_INT (-1)));
6713 emit_insn (gen_xorv16qi3 (operands[3], operands[3], vreg));
6715 emit_insn (gen_xxpermx_inst (operands[0], operands[2],
6716 operands[1], operands[3],
6722 [(set_attr "type" "vecperm")])
6724 (define_insn "xxpermx_inst"
6725 [(set (match_operand:V2DI 0 "register_operand" "+v")
6726 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "v")
6727 (match_operand:V2DI 2 "register_operand" "v")
6728 (match_operand:V16QI 3 "register_operand" "v")
6729 (match_operand:QI 4 "u3bit_cint_operand" "n")]
6732 "xxpermx %x0,%x1,%x2,%x3,%4"
6733 [(set_attr "type" "vecperm")
6734 (set_attr "prefixed" "yes")])
6736 ;; XXEVAL built-in function support
6737 (define_insn "xxeval"
6738 [(set (match_operand:V2DI 0 "register_operand" "=wa")
6739 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "wa")
6740 (match_operand:V2DI 2 "register_operand" "wa")
6741 (match_operand:V2DI 3 "register_operand" "wa")
6742 (match_operand:QI 4 "u8bit_cint_operand" "n")]
6745 "xxeval %x0,%x1,%x2,%x3,%4"
6746 [(set_attr "type" "vecperm")
6747 (set_attr "prefixed" "yes")])
6749 ;; Construct V1TI by vsx_concat_v2di
6751 [(set (match_operand:V1TI 0 "vsx_register_operand")
6753 (match_operand:TI 1 "int_reg_operand") 0 ))]
6754 "TARGET_P9_VECTOR && !reload_completed"
6757 rtx tmp1 = simplify_gen_subreg (DImode, operands[1], TImode, 0);
6758 rtx tmp2 = simplify_gen_subreg (DImode, operands[1], TImode, 8);
6759 rtx tmp3 = gen_reg_rtx (V2DImode);
6760 emit_insn (gen_vsx_concat_v2di (tmp3, tmp1, tmp2));
6761 rtx tmp4 = simplify_gen_subreg (V1TImode, tmp3, V2DImode, 0);
6762 emit_move_insn (operands[0], tmp4);
6767 (define_insn "vmsumcud"
6768 [(set (match_operand:V1TI 0 "register_operand" "+v")
6769 (unspec:V1TI [(match_operand:V2DI 1 "register_operand" "v")
6770 (match_operand:V2DI 2 "register_operand" "v")
6771 (match_operand:V1TI 3 "register_operand" "v")]
6774 "vmsumcud %0,%1,%2,%3"
6775 [(set_attr "type" "veccomplex")]
6779 [(set (match_operand:V1TI 0 "gpc_reg_operand")
6780 (match_operand:V1TI 1 "vsx_register_operand"))]
6782 && TARGET_DIRECT_MOVE_64BIT
6783 && int_reg_operand (operands[0], V1TImode)
6784 && vsx_register_operand (operands[1], V1TImode)"
6787 rtx src_op = gen_rtx_REG (V2DImode, REGNO (operands[1]));
6788 rtx dest_op0 = gen_rtx_REG (DImode, REGNO (operands[0]));
6789 rtx dest_op1 = gen_rtx_REG (DImode, REGNO (operands[0]) + 1);
6790 emit_insn (gen_vsx_extract_v2di (dest_op0, src_op, const0_rtx));
6791 emit_insn (gen_vsx_extract_v2di (dest_op1, src_op, const1_rtx));