2 ;; Copyright (C) 2009-2019 Free Software Foundation, Inc.
3 ;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com>
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published
9 ;; by the Free Software Foundation; either version 3, or (at your
10 ;; option) any later version.
12 ;; GCC is distributed in the hope that it will be useful, but WITHOUT
13 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 ;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 ;; License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 ;; Iterator for comparison types
22 (define_code_iterator CMP_TEST [eq lt gt unordered])
24 ;; Mode attribute for vector floate and floato conversions
25 (define_mode_attr VF_sxddp [(V2DI "sxd") (V2DF "dp")])
27 ;; Iterator for both scalar and vector floating point types supported by VSX
28 (define_mode_iterator VSX_B [DF V4SF V2DF])
30 ;; Iterator for the 2 64-bit vector types
31 (define_mode_iterator VSX_D [V2DF V2DI])
33 ;; Mode iterator to handle swapping words on little endian for the 128-bit
34 ;; types that goes in a single vector register.
35 (define_mode_iterator VSX_LE_128 [(KF "FLOAT128_VECTOR_P (KFmode)")
36 (TF "FLOAT128_VECTOR_P (TFmode)")
40 ;; Iterator for 128-bit integer types that go in a single vector register.
41 (define_mode_iterator VSX_TI [TI V1TI])
43 ;; Iterator for the 2 32-bit vector types
44 (define_mode_iterator VSX_W [V4SF V4SI])
46 ;; Iterator for the DF types
47 (define_mode_iterator VSX_DF [V2DF DF])
49 ;; Iterator for vector floating point types supported by VSX
50 (define_mode_iterator VSX_F [V4SF V2DF])
52 ;; Iterator for logical types supported by VSX
53 (define_mode_iterator VSX_L [V16QI
61 (KF "FLOAT128_VECTOR_P (KFmode)")
62 (TF "FLOAT128_VECTOR_P (TFmode)")])
64 ;; Iterator for memory moves.
65 (define_mode_iterator VSX_M [V16QI
72 (KF "FLOAT128_VECTOR_P (KFmode)")
73 (TF "FLOAT128_VECTOR_P (TFmode)")
76 (define_mode_attr VSX_XXBR [(V8HI "h")
83 ;; Map into the appropriate load/store name based on the type
84 (define_mode_attr VSm [(V16QI "vw4")
96 ;; Map the register class used
97 (define_mode_attr VSr [(V16QI "v")
111 ;; What value we need in the "isa" field, to make the IEEE QP float work.
112 (define_mode_attr VSisa [(V16QI "*")
126 ;; A mode attribute to disparage use of GPR registers, except for scalar
128 (define_mode_attr ??r [(V16QI "??r")
139 ;; A mode attribute used for 128-bit constant values.
140 (define_mode_attr nW [(V16QI "W")
151 ;; Same size integer type for floating point data
152 (define_mode_attr VSi [(V4SF "v4si")
156 (define_mode_attr VSI [(V4SF "V4SI")
160 ;; Word size for same size conversion
161 (define_mode_attr VSc [(V4SF "w")
165 ;; Map into either s or v, depending on whether this is a scalar or vector
167 (define_mode_attr VSv [(V16QI "v")
177 ;; Appropriate type for add ops (and other simple FP ops)
178 (define_mode_attr VStype_simple [(V2DF "vecdouble")
182 ;; Appropriate type for multiply ops
183 (define_mode_attr VStype_mul [(V2DF "vecdouble")
187 ;; Appropriate type for divide ops.
188 (define_mode_attr VStype_div [(V2DF "vecdiv")
192 ;; Map the scalar mode for a vector type
193 (define_mode_attr VS_scalar [(V1TI "TI")
201 ;; Map to a double-sized vector mode
202 (define_mode_attr VS_double [(V4SI "V8SI")
208 ;; Iterators for loading constants with xxspltib
209 (define_mode_iterator VSINT_84 [V4SI V2DI DI SI])
210 (define_mode_iterator VSINT_842 [V8HI V4SI V2DI])
212 ;; Vector reverse byte modes
213 (define_mode_iterator VEC_REVB [V8HI V4SI V2DI V4SF V2DF V1TI])
215 ;; Iterator for ISA 3.0 vector extract/insert of small integer vectors.
216 ;; VSX_EXTRACT_I2 doesn't include V4SImode because SI extracts can be
217 ;; done on ISA 2.07 and not just ISA 3.0.
218 (define_mode_iterator VSX_EXTRACT_I [V16QI V8HI V4SI])
219 (define_mode_iterator VSX_EXTRACT_I2 [V16QI V8HI])
221 (define_mode_attr VSX_EXTRACT_WIDTH [(V16QI "b")
225 ;; Mode attribute to give the correct predicate for ISA 3.0 vector extract and
226 ;; insert to validate the operand number.
227 (define_mode_attr VSX_EXTRACT_PREDICATE [(V16QI "const_0_to_15_operand")
228 (V8HI "const_0_to_7_operand")
229 (V4SI "const_0_to_3_operand")])
231 ;; Mode attribute to give the constraint for vector extract and insert
233 (define_mode_attr VSX_EX [(V16QI "v")
237 ;; Mode iterator for binary floating types other than double to
238 ;; optimize convert to that floating point type from an extract
239 ;; of an integer type
240 (define_mode_iterator VSX_EXTRACT_FL [SF
241 (IF "FLOAT128_2REG_P (IFmode)")
242 (KF "TARGET_FLOAT128_HW")
243 (TF "FLOAT128_2REG_P (TFmode)
244 || (FLOAT128_IEEE_P (TFmode)
245 && TARGET_FLOAT128_HW)")])
247 ;; Mode iterator for binary floating types that have a direct conversion
248 ;; from 64-bit integer to floating point
249 (define_mode_iterator FL_CONV [SF
251 (KF "TARGET_FLOAT128_HW")
252 (TF "TARGET_FLOAT128_HW
253 && FLOAT128_IEEE_P (TFmode)")])
255 ;; Iterator for the 2 short vector types to do a splat from an integer
256 (define_mode_iterator VSX_SPLAT_I [V16QI V8HI])
258 ;; Mode attribute to give the count for the splat instruction to splat
259 ;; the value in the 64-bit integer slot
260 (define_mode_attr VSX_SPLAT_COUNT [(V16QI "7") (V8HI "3")])
262 ;; Mode attribute to give the suffix for the splat instruction
263 (define_mode_attr VSX_SPLAT_SUFFIX [(V16QI "b") (V8HI "h")])
265 ;; Constants for creating unspecs
266 (define_c_enum "unspec"
283 UNSPEC_VSX_UNS_FLOAT2
285 UNSPEC_VSX_UNS_FLOATE
287 UNSPEC_VSX_UNS_FLOATO
303 UNSPEC_VSX_XVCVDPSXDS
305 UNSPEC_VSX_XVCVDPUXDS
306 UNSPEC_VSX_SIGN_EXTEND
307 UNSPEC_VSX_XVCVSPSXWS
308 UNSPEC_VSX_XVCVSPSXDS
318 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH
319 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL
351 UNSPEC_VSX_FIRST_MATCH_INDEX
352 UNSPEC_VSX_FIRST_MATCH_EOS_INDEX
353 UNSPEC_VSX_FIRST_MISMATCH_INDEX
354 UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX
359 ;; The patterns for LE permuted loads and stores come before the general
360 ;; VSX moves so they match first.
361 (define_insn_and_split "*vsx_le_perm_load_<mode>"
362 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
363 (match_operand:VSX_D 1 "indexed_or_indirect_operand" "Z"))]
364 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
370 (parallel [(const_int 1) (const_int 0)])))
374 (parallel [(const_int 1) (const_int 0)])))]
376 rtx mem = operands[1];
378 /* Don't apply the swap optimization if we've already performed register
379 allocation and the hard register destination is not in the altivec
381 if ((MEM_ALIGN (mem) >= 128)
382 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[0]))
383 || ALTIVEC_REGNO_P (reg_or_subregno (operands[0]))))
385 rtx mem_address = XEXP (mem, 0);
386 enum machine_mode mode = GET_MODE (mem);
388 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
390 /* Replace the source memory address with masked address. */
391 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
392 emit_insn (lvx_set_expr);
395 else if (rs6000_quadword_masked_address_p (mem_address))
397 /* This rtl is already in the form that matches lvx
398 instruction, so leave it alone. */
401 /* Otherwise, fall through to transform into a swapping load. */
403 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
406 [(set_attr "type" "vecload")
407 (set_attr "length" "8")])
409 (define_insn_and_split "*vsx_le_perm_load_<mode>"
410 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
411 (match_operand:VSX_W 1 "indexed_or_indirect_operand" "Z"))]
412 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
418 (parallel [(const_int 2) (const_int 3)
419 (const_int 0) (const_int 1)])))
423 (parallel [(const_int 2) (const_int 3)
424 (const_int 0) (const_int 1)])))]
426 rtx mem = operands[1];
428 /* Don't apply the swap optimization if we've already performed register
429 allocation and the hard register destination is not in the altivec
431 if ((MEM_ALIGN (mem) >= 128)
432 && (!HARD_REGISTER_P (operands[0])
433 || ALTIVEC_REGNO_P (REGNO(operands[0]))))
435 rtx mem_address = XEXP (mem, 0);
436 enum machine_mode mode = GET_MODE (mem);
438 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
440 /* Replace the source memory address with masked address. */
441 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
442 emit_insn (lvx_set_expr);
445 else if (rs6000_quadword_masked_address_p (mem_address))
447 /* This rtl is already in the form that matches lvx
448 instruction, so leave it alone. */
451 /* Otherwise, fall through to transform into a swapping load. */
453 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
456 [(set_attr "type" "vecload")
457 (set_attr "length" "8")])
459 (define_insn_and_split "*vsx_le_perm_load_v8hi"
460 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
461 (match_operand:V8HI 1 "indexed_or_indirect_operand" "Z"))]
462 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
468 (parallel [(const_int 4) (const_int 5)
469 (const_int 6) (const_int 7)
470 (const_int 0) (const_int 1)
471 (const_int 2) (const_int 3)])))
475 (parallel [(const_int 4) (const_int 5)
476 (const_int 6) (const_int 7)
477 (const_int 0) (const_int 1)
478 (const_int 2) (const_int 3)])))]
480 rtx mem = operands[1];
482 /* Don't apply the swap optimization if we've already performed register
483 allocation and the hard register destination is not in the altivec
485 if ((MEM_ALIGN (mem) >= 128)
486 && (!HARD_REGISTER_P (operands[0])
487 || ALTIVEC_REGNO_P (REGNO(operands[0]))))
489 rtx mem_address = XEXP (mem, 0);
490 enum machine_mode mode = GET_MODE (mem);
492 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
494 /* Replace the source memory address with masked address. */
495 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
496 emit_insn (lvx_set_expr);
499 else if (rs6000_quadword_masked_address_p (mem_address))
501 /* This rtl is already in the form that matches lvx
502 instruction, so leave it alone. */
505 /* Otherwise, fall through to transform into a swapping load. */
507 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
510 [(set_attr "type" "vecload")
511 (set_attr "length" "8")])
513 (define_insn_and_split "*vsx_le_perm_load_v16qi"
514 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
515 (match_operand:V16QI 1 "indexed_or_indirect_operand" "Z"))]
516 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
522 (parallel [(const_int 8) (const_int 9)
523 (const_int 10) (const_int 11)
524 (const_int 12) (const_int 13)
525 (const_int 14) (const_int 15)
526 (const_int 0) (const_int 1)
527 (const_int 2) (const_int 3)
528 (const_int 4) (const_int 5)
529 (const_int 6) (const_int 7)])))
533 (parallel [(const_int 8) (const_int 9)
534 (const_int 10) (const_int 11)
535 (const_int 12) (const_int 13)
536 (const_int 14) (const_int 15)
537 (const_int 0) (const_int 1)
538 (const_int 2) (const_int 3)
539 (const_int 4) (const_int 5)
540 (const_int 6) (const_int 7)])))]
542 rtx mem = operands[1];
544 /* Don't apply the swap optimization if we've already performed register
545 allocation and the hard register destination is not in the altivec
547 if ((MEM_ALIGN (mem) >= 128)
548 && (!HARD_REGISTER_P (operands[0])
549 || ALTIVEC_REGNO_P (REGNO(operands[0]))))
551 rtx mem_address = XEXP (mem, 0);
552 enum machine_mode mode = GET_MODE (mem);
554 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
556 /* Replace the source memory address with masked address. */
557 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
558 emit_insn (lvx_set_expr);
561 else if (rs6000_quadword_masked_address_p (mem_address))
563 /* This rtl is already in the form that matches lvx
564 instruction, so leave it alone. */
567 /* Otherwise, fall through to transform into a swapping load. */
569 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
572 [(set_attr "type" "vecload")
573 (set_attr "length" "8")])
575 (define_insn "*vsx_le_perm_store_<mode>"
576 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand" "=Z")
577 (match_operand:VSX_D 1 "vsx_register_operand" "+wa"))]
578 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
580 [(set_attr "type" "vecstore")
581 (set_attr "length" "12")])
584 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand")
585 (match_operand:VSX_D 1 "vsx_register_operand"))]
586 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
590 (parallel [(const_int 1) (const_int 0)])))
594 (parallel [(const_int 1) (const_int 0)])))]
596 rtx mem = operands[0];
598 /* Don't apply the swap optimization if we've already performed register
599 allocation and the hard register source is not in the altivec range. */
600 if ((MEM_ALIGN (mem) >= 128)
601 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
602 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
604 rtx mem_address = XEXP (mem, 0);
605 enum machine_mode mode = GET_MODE (mem);
606 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
608 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
609 emit_insn (stvx_set_expr);
612 else if (rs6000_quadword_masked_address_p (mem_address))
614 /* This rtl is already in the form that matches stvx instruction,
615 so leave it alone. */
618 /* Otherwise, fall through to transform into a swapping store. */
621 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
625 ;; The post-reload split requires that we re-permute the source
626 ;; register in case it is still live.
628 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand")
629 (match_operand:VSX_D 1 "vsx_register_operand"))]
630 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
634 (parallel [(const_int 1) (const_int 0)])))
638 (parallel [(const_int 1) (const_int 0)])))
642 (parallel [(const_int 1) (const_int 0)])))]
645 (define_insn "*vsx_le_perm_store_<mode>"
646 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand" "=Z")
647 (match_operand:VSX_W 1 "vsx_register_operand" "+wa"))]
648 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
650 [(set_attr "type" "vecstore")
651 (set_attr "length" "12")])
654 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand")
655 (match_operand:VSX_W 1 "vsx_register_operand"))]
656 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
660 (parallel [(const_int 2) (const_int 3)
661 (const_int 0) (const_int 1)])))
665 (parallel [(const_int 2) (const_int 3)
666 (const_int 0) (const_int 1)])))]
668 rtx mem = operands[0];
670 /* Don't apply the swap optimization if we've already performed register
671 allocation and the hard register source is not in the altivec range. */
672 if ((MEM_ALIGN (mem) >= 128)
673 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
674 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
676 rtx mem_address = XEXP (mem, 0);
677 enum machine_mode mode = GET_MODE (mem);
678 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
680 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
681 emit_insn (stvx_set_expr);
684 else if (rs6000_quadword_masked_address_p (mem_address))
686 /* This rtl is already in the form that matches stvx instruction,
687 so leave it alone. */
690 /* Otherwise, fall through to transform into a swapping store. */
693 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
697 ;; The post-reload split requires that we re-permute the source
698 ;; register in case it is still live.
700 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand")
701 (match_operand:VSX_W 1 "vsx_register_operand"))]
702 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
706 (parallel [(const_int 2) (const_int 3)
707 (const_int 0) (const_int 1)])))
711 (parallel [(const_int 2) (const_int 3)
712 (const_int 0) (const_int 1)])))
716 (parallel [(const_int 2) (const_int 3)
717 (const_int 0) (const_int 1)])))]
720 (define_insn "*vsx_le_perm_store_v8hi"
721 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand" "=Z")
722 (match_operand:V8HI 1 "vsx_register_operand" "+wa"))]
723 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
725 [(set_attr "type" "vecstore")
726 (set_attr "length" "12")])
729 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand")
730 (match_operand:V8HI 1 "vsx_register_operand"))]
731 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
735 (parallel [(const_int 4) (const_int 5)
736 (const_int 6) (const_int 7)
737 (const_int 0) (const_int 1)
738 (const_int 2) (const_int 3)])))
742 (parallel [(const_int 4) (const_int 5)
743 (const_int 6) (const_int 7)
744 (const_int 0) (const_int 1)
745 (const_int 2) (const_int 3)])))]
747 rtx mem = operands[0];
749 /* Don't apply the swap optimization if we've already performed register
750 allocation and the hard register source is not in the altivec range. */
751 if ((MEM_ALIGN (mem) >= 128)
752 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
753 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
755 rtx mem_address = XEXP (mem, 0);
756 enum machine_mode mode = GET_MODE (mem);
757 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
759 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
760 emit_insn (stvx_set_expr);
763 else if (rs6000_quadword_masked_address_p (mem_address))
765 /* This rtl is already in the form that matches stvx instruction,
766 so leave it alone. */
769 /* Otherwise, fall through to transform into a swapping store. */
772 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
776 ;; The post-reload split requires that we re-permute the source
777 ;; register in case it is still live.
779 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand")
780 (match_operand:V8HI 1 "vsx_register_operand"))]
781 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
785 (parallel [(const_int 4) (const_int 5)
786 (const_int 6) (const_int 7)
787 (const_int 0) (const_int 1)
788 (const_int 2) (const_int 3)])))
792 (parallel [(const_int 4) (const_int 5)
793 (const_int 6) (const_int 7)
794 (const_int 0) (const_int 1)
795 (const_int 2) (const_int 3)])))
799 (parallel [(const_int 4) (const_int 5)
800 (const_int 6) (const_int 7)
801 (const_int 0) (const_int 1)
802 (const_int 2) (const_int 3)])))]
805 (define_insn "*vsx_le_perm_store_v16qi"
806 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand" "=Z")
807 (match_operand:V16QI 1 "vsx_register_operand" "+wa"))]
808 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
810 [(set_attr "type" "vecstore")
811 (set_attr "length" "12")])
814 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand")
815 (match_operand:V16QI 1 "vsx_register_operand"))]
816 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
820 (parallel [(const_int 8) (const_int 9)
821 (const_int 10) (const_int 11)
822 (const_int 12) (const_int 13)
823 (const_int 14) (const_int 15)
824 (const_int 0) (const_int 1)
825 (const_int 2) (const_int 3)
826 (const_int 4) (const_int 5)
827 (const_int 6) (const_int 7)])))
831 (parallel [(const_int 8) (const_int 9)
832 (const_int 10) (const_int 11)
833 (const_int 12) (const_int 13)
834 (const_int 14) (const_int 15)
835 (const_int 0) (const_int 1)
836 (const_int 2) (const_int 3)
837 (const_int 4) (const_int 5)
838 (const_int 6) (const_int 7)])))]
840 rtx mem = operands[0];
842 /* Don't apply the swap optimization if we've already performed register
843 allocation and the hard register source is not in the altivec range. */
844 if ((MEM_ALIGN (mem) >= 128)
845 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
846 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
848 rtx mem_address = XEXP (mem, 0);
849 enum machine_mode mode = GET_MODE (mem);
850 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
852 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
853 emit_insn (stvx_set_expr);
856 else if (rs6000_quadword_masked_address_p (mem_address))
858 /* This rtl is already in the form that matches stvx instruction,
859 so leave it alone. */
862 /* Otherwise, fall through to transform into a swapping store. */
865 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
869 ;; The post-reload split requires that we re-permute the source
870 ;; register in case it is still live.
872 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand")
873 (match_operand:V16QI 1 "vsx_register_operand"))]
874 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
878 (parallel [(const_int 8) (const_int 9)
879 (const_int 10) (const_int 11)
880 (const_int 12) (const_int 13)
881 (const_int 14) (const_int 15)
882 (const_int 0) (const_int 1)
883 (const_int 2) (const_int 3)
884 (const_int 4) (const_int 5)
885 (const_int 6) (const_int 7)])))
889 (parallel [(const_int 8) (const_int 9)
890 (const_int 10) (const_int 11)
891 (const_int 12) (const_int 13)
892 (const_int 14) (const_int 15)
893 (const_int 0) (const_int 1)
894 (const_int 2) (const_int 3)
895 (const_int 4) (const_int 5)
896 (const_int 6) (const_int 7)])))
900 (parallel [(const_int 8) (const_int 9)
901 (const_int 10) (const_int 11)
902 (const_int 12) (const_int 13)
903 (const_int 14) (const_int 15)
904 (const_int 0) (const_int 1)
905 (const_int 2) (const_int 3)
906 (const_int 4) (const_int 5)
907 (const_int 6) (const_int 7)])))]
910 ;; Little endian word swapping for 128-bit types that are either scalars or the
911 ;; special V1TI container class, which it is not appropriate to use vec_select
913 (define_insn "*vsx_le_permute_<mode>"
914 [(set (match_operand:VSX_TI 0 "nonimmediate_operand" "=wa,wa,Z,&r,&r,Q")
916 (match_operand:VSX_TI 1 "input_operand" "wa,Z,wa,r,Q,r")
918 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
920 xxpermdi %x0,%x1,%x1,2
924 ld%U1%X1 %0,%L1\;ld%U1%X1 %L0,%1
925 std%U0%X0 %L1,%0\;std%U0%X0 %1,%L0"
926 [(set_attr "length" "*,*,*,8,8,8")
927 (set_attr "type" "vecperm,vecload,vecstore,*,load,store")])
929 (define_insn_and_split "*vsx_le_undo_permute_<mode>"
930 [(set (match_operand:VSX_TI 0 "vsx_register_operand" "=wa,wa")
933 (match_operand:VSX_TI 1 "vsx_register_operand" "0,wa")
936 "!BYTES_BIG_ENDIAN && TARGET_VSX"
941 [(set (match_dup 0) (match_dup 1))]
943 if (reload_completed && REGNO (operands[0]) == REGNO (operands[1]))
945 emit_note (NOTE_INSN_DELETED);
949 [(set_attr "length" "0,4")
950 (set_attr "type" "veclogical")])
952 (define_insn_and_split "*vsx_le_perm_load_<mode>"
953 [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=wa,r")
954 (match_operand:VSX_LE_128 1 "memory_operand" "Z,Q"))]
955 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
959 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
962 rtx tmp = (can_create_pseudo_p ()
963 ? gen_reg_rtx_and_attrs (operands[0])
965 rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
966 rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
969 [(set_attr "type" "vecload,load")
970 (set_attr "length" "8,8")
971 (set_attr "isa" "<VSisa>,*")])
973 (define_insn "*vsx_le_perm_store_<mode>"
974 [(set (match_operand:VSX_LE_128 0 "memory_operand" "=Z,Q")
975 (match_operand:VSX_LE_128 1 "vsx_register_operand" "+wa,r"))]
976 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
980 [(set_attr "type" "vecstore,store")
981 (set_attr "length" "12,8")
982 (set_attr "isa" "<VSisa>,*")])
985 [(set (match_operand:VSX_LE_128 0 "memory_operand")
986 (match_operand:VSX_LE_128 1 "vsx_register_operand"))]
987 "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR"
990 rtx tmp = (can_create_pseudo_p ()
991 ? gen_reg_rtx_and_attrs (operands[0])
993 rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
994 rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
998 ;; Peepholes to catch loads and stores for TImode if TImode landed in
999 ;; GPR registers on a little endian system.
1001 [(set (match_operand:VSX_TI 0 "int_reg_operand")
1002 (rotate:VSX_TI (match_operand:VSX_TI 1 "memory_operand")
1004 (set (match_operand:VSX_TI 2 "int_reg_operand")
1005 (rotate:VSX_TI (match_dup 0)
1007 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1008 && (rtx_equal_p (operands[0], operands[2])
1009 || peep2_reg_dead_p (2, operands[0]))"
1010 [(set (match_dup 2) (match_dup 1))])
1013 [(set (match_operand:VSX_TI 0 "int_reg_operand")
1014 (rotate:VSX_TI (match_operand:VSX_TI 1 "int_reg_operand")
1016 (set (match_operand:VSX_TI 2 "memory_operand")
1017 (rotate:VSX_TI (match_dup 0)
1019 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1020 && peep2_reg_dead_p (2, operands[0])"
1021 [(set (match_dup 2) (match_dup 1))])
1023 ;; Peephole to catch memory to memory transfers for TImode if TImode landed in
1024 ;; VSX registers on a little endian system. The vector types and IEEE 128-bit
1025 ;; floating point are handled by the more generic swap elimination pass.
1027 [(set (match_operand:TI 0 "vsx_register_operand")
1028 (rotate:TI (match_operand:TI 1 "vsx_register_operand")
1030 (set (match_operand:TI 2 "vsx_register_operand")
1031 (rotate:TI (match_dup 0)
1033 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1034 && (rtx_equal_p (operands[0], operands[2])
1035 || peep2_reg_dead_p (2, operands[0]))"
1036 [(set (match_dup 2) (match_dup 1))])
1038 ;; The post-reload split requires that we re-permute the source
1039 ;; register in case it is still live.
1041 [(set (match_operand:VSX_LE_128 0 "memory_operand")
1042 (match_operand:VSX_LE_128 1 "vsx_register_operand"))]
1043 "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR"
1046 rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
1047 rs6000_emit_le_vsx_permute (operands[0], operands[1], <MODE>mode);
1048 rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
1052 ;; Vector constants that can be generated with XXSPLTIB that was added in ISA
1053 ;; 3.0. Both (const_vector [..]) and (vec_duplicate ...) forms are recognized.
1054 (define_insn "xxspltib_v16qi"
1055 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1056 (vec_duplicate:V16QI (match_operand:SI 1 "s8bit_cint_operand" "n")))]
1059 operands[2] = GEN_INT (INTVAL (operands[1]) & 0xff);
1060 return "xxspltib %x0,%2";
1062 [(set_attr "type" "vecperm")])
1064 (define_insn "xxspltib_<mode>_nosplit"
1065 [(set (match_operand:VSINT_842 0 "vsx_register_operand" "=wa,wa")
1066 (match_operand:VSINT_842 1 "xxspltib_constant_nosplit" "jwM,wE"))]
1069 rtx op1 = operands[1];
1073 if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
1077 operands[2] = GEN_INT (value & 0xff);
1078 return "xxspltib %x0,%2";
1080 [(set_attr "type" "vecperm")])
1082 (define_insn_and_split "*xxspltib_<mode>_split"
1083 [(set (match_operand:VSINT_842 0 "altivec_register_operand" "=v")
1084 (match_operand:VSINT_842 1 "xxspltib_constant_split" "wS"))]
1092 rtx op0 = operands[0];
1093 rtx op1 = operands[1];
1094 rtx tmp = ((can_create_pseudo_p ())
1095 ? gen_reg_rtx (V16QImode)
1096 : gen_lowpart (V16QImode, op0));
1098 if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
1102 emit_insn (gen_xxspltib_v16qi (tmp, GEN_INT (value)));
1104 if (<MODE>mode == V2DImode)
1105 emit_insn (gen_vsx_sign_extend_qi_v2di (op0, tmp));
1107 else if (<MODE>mode == V4SImode)
1108 emit_insn (gen_vsx_sign_extend_qi_v4si (op0, tmp));
1110 else if (<MODE>mode == V8HImode)
1111 emit_insn (gen_altivec_vupkhsb (op0, tmp));
1118 [(set_attr "type" "vecperm")
1119 (set_attr "length" "8")])
1122 ;; Prefer using vector registers over GPRs. Prefer using ISA 3.0's XXSPLTISB
1123 ;; or Altivec VSPLITW 0/-1 over XXLXOR/XXLORC to set a register to all 0's or
1124 ;; all 1's, since the machine does not have to wait for the previous
1125 ;; instruction using the register being set (such as a store waiting on a slow
1126 ;; instruction). But generate XXLXOR/XXLORC if it will avoid a register move.
1128 ;; VSX store VSX load VSX move VSX->GPR GPR->VSX LQ (GPR)
1129 ;; STQ (GPR) GPR load GPR store GPR move XXSPLTIB VSPLTISW
1130 ;; VSX 0/-1 VMX const GPR const LVX (VMX) STVX (VMX)
1131 (define_insn "vsx_mov<mode>_64bit"
1132 [(set (match_operand:VSX_M 0 "nonimmediate_operand"
1133 "=ZwO, wa, wa, r, we, ?wQ,
1134 ?&r, ??r, ??Y, <??r>, wa, v,
1135 ?wa, v, <??r>, wZ, v")
1137 (match_operand:VSX_M 1 "input_operand"
1138 "wa, ZwO, wa, we, r, r,
1139 wQ, Y, r, r, wE, jwM,
1140 ?jwM, W, <nW>, v, wZ"))]
1142 "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1143 && (register_operand (operands[0], <MODE>mode)
1144 || register_operand (operands[1], <MODE>mode))"
1146 return rs6000_output_move_128bit (operands);
1149 "vecstore, vecload, vecsimple, mffgpr, mftgpr, load,
1150 store, load, store, *, vecsimple, vecsimple,
1151 vecsimple, *, *, vecstore, vecload")
1157 "<VSisa>, <VSisa>, <VSisa>, *, *, *,
1159 <VSisa>, *, *, *, *")])
1161 ;; VSX store VSX load VSX move GPR load GPR store GPR move
1162 ;; XXSPLTIB VSPLTISW VSX 0/-1 VMX const GPR const
1163 ;; LVX (VMX) STVX (VMX)
1164 (define_insn "*vsx_mov<mode>_32bit"
1165 [(set (match_operand:VSX_M 0 "nonimmediate_operand"
1166 "=ZwO, wa, wa, ??r, ??Y, <??r>,
1167 wa, v, ?wa, v, <??r>,
1170 (match_operand:VSX_M 1 "input_operand"
1171 "wa, ZwO, wa, Y, r, r,
1172 wE, jwM, ?jwM, W, <nW>,
1175 "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1176 && (register_operand (operands[0], <MODE>mode)
1177 || register_operand (operands[1], <MODE>mode))"
1179 return rs6000_output_move_128bit (operands);
1182 "vecstore, vecload, vecsimple, load, store, *,
1183 vecsimple, vecsimple, vecsimple, *, *,
1186 "*, *, *, 16, 16, 16,
1190 "<VSisa>, <VSisa>, <VSisa>, *, *, *,
1191 p9v, *, <VSisa>, *, *,
1194 ;; Explicit load/store expanders for the builtin functions
1195 (define_expand "vsx_load_<mode>"
1196 [(set (match_operand:VSX_M 0 "vsx_register_operand")
1197 (match_operand:VSX_M 1 "memory_operand"))]
1198 "VECTOR_MEM_VSX_P (<MODE>mode)"
1200 /* Expand to swaps if needed, prior to swap optimization. */
1201 if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR)
1203 rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1208 (define_expand "vsx_store_<mode>"
1209 [(set (match_operand:VSX_M 0 "memory_operand")
1210 (match_operand:VSX_M 1 "vsx_register_operand"))]
1211 "VECTOR_MEM_VSX_P (<MODE>mode)"
1213 /* Expand to swaps if needed, prior to swap optimization. */
1214 if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR)
1216 rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1221 ;; Explicit load/store expanders for the builtin functions for lxvd2x, etc.,
1222 ;; when you really want their element-reversing behavior.
1223 (define_insn "vsx_ld_elemrev_v2di"
1224 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1226 (match_operand:V2DI 1 "memory_operand" "Z")
1227 (parallel [(const_int 1) (const_int 0)])))]
1228 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1230 [(set_attr "type" "vecload")])
1232 (define_insn "vsx_ld_elemrev_v1ti"
1233 [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
1235 (match_operand:V1TI 1 "memory_operand" "Z")
1236 (parallel [(const_int 0)])))]
1237 "VECTOR_MEM_VSX_P (V1TImode) && !BYTES_BIG_ENDIAN"
1239 return "lxvd2x %x0,%y1\;xxpermdi %x0,%x0,%x0,2";
1241 [(set_attr "type" "vecload")])
1243 (define_insn "vsx_ld_elemrev_v2df"
1244 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
1246 (match_operand:V2DF 1 "memory_operand" "Z")
1247 (parallel [(const_int 1) (const_int 0)])))]
1248 "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1250 [(set_attr "type" "vecload")])
1252 (define_insn "vsx_ld_elemrev_v4si"
1253 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
1255 (match_operand:V4SI 1 "memory_operand" "Z")
1256 (parallel [(const_int 3) (const_int 2)
1257 (const_int 1) (const_int 0)])))]
1258 "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1260 [(set_attr "type" "vecload")])
1262 (define_insn "vsx_ld_elemrev_v4sf"
1263 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
1265 (match_operand:V4SF 1 "memory_operand" "Z")
1266 (parallel [(const_int 3) (const_int 2)
1267 (const_int 1) (const_int 0)])))]
1268 "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1270 [(set_attr "type" "vecload")])
1272 (define_expand "vsx_ld_elemrev_v8hi"
1273 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1275 (match_operand:V8HI 1 "memory_operand" "Z")
1276 (parallel [(const_int 7) (const_int 6)
1277 (const_int 5) (const_int 4)
1278 (const_int 3) (const_int 2)
1279 (const_int 1) (const_int 0)])))]
1280 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
1282 if (!TARGET_P9_VECTOR)
1284 rtx tmp = gen_reg_rtx (V4SImode);
1285 rtx subreg, subreg2, perm[16], pcv;
1286 /* 2 is leftmost element in register */
1287 unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
1290 subreg = simplify_gen_subreg (V4SImode, operands[1], V8HImode, 0);
1291 emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
1292 subreg2 = simplify_gen_subreg (V8HImode, tmp, V4SImode, 0);
1294 for (i = 0; i < 16; ++i)
1295 perm[i] = GEN_INT (reorder[i]);
1297 pcv = force_reg (V16QImode,
1298 gen_rtx_CONST_VECTOR (V16QImode,
1299 gen_rtvec_v (16, perm)));
1300 emit_insn (gen_altivec_vperm_v8hi_direct (operands[0], subreg2,
1306 (define_insn "*vsx_ld_elemrev_v8hi_internal"
1307 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1309 (match_operand:V8HI 1 "memory_operand" "Z")
1310 (parallel [(const_int 7) (const_int 6)
1311 (const_int 5) (const_int 4)
1312 (const_int 3) (const_int 2)
1313 (const_int 1) (const_int 0)])))]
1314 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1316 [(set_attr "type" "vecload")])
1318 (define_expand "vsx_ld_elemrev_v16qi"
1319 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1321 (match_operand:V16QI 1 "memory_operand" "Z")
1322 (parallel [(const_int 15) (const_int 14)
1323 (const_int 13) (const_int 12)
1324 (const_int 11) (const_int 10)
1325 (const_int 9) (const_int 8)
1326 (const_int 7) (const_int 6)
1327 (const_int 5) (const_int 4)
1328 (const_int 3) (const_int 2)
1329 (const_int 1) (const_int 0)])))]
1330 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
1332 if (!TARGET_P9_VECTOR)
1334 rtx tmp = gen_reg_rtx (V4SImode);
1335 rtx subreg, subreg2, perm[16], pcv;
1336 /* 3 is leftmost element in register */
1337 unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
1340 subreg = simplify_gen_subreg (V4SImode, operands[1], V16QImode, 0);
1341 emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
1342 subreg2 = simplify_gen_subreg (V16QImode, tmp, V4SImode, 0);
1344 for (i = 0; i < 16; ++i)
1345 perm[i] = GEN_INT (reorder[i]);
1347 pcv = force_reg (V16QImode,
1348 gen_rtx_CONST_VECTOR (V16QImode,
1349 gen_rtvec_v (16, perm)));
1350 emit_insn (gen_altivec_vperm_v16qi_direct (operands[0], subreg2,
1356 (define_insn "vsx_ld_elemrev_v16qi_internal"
1357 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1359 (match_operand:V16QI 1 "memory_operand" "Z")
1360 (parallel [(const_int 15) (const_int 14)
1361 (const_int 13) (const_int 12)
1362 (const_int 11) (const_int 10)
1363 (const_int 9) (const_int 8)
1364 (const_int 7) (const_int 6)
1365 (const_int 5) (const_int 4)
1366 (const_int 3) (const_int 2)
1367 (const_int 1) (const_int 0)])))]
1368 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1370 [(set_attr "type" "vecload")])
1372 (define_insn "vsx_st_elemrev_v1ti"
1373 [(set (match_operand:V1TI 0 "memory_operand" "=Z")
1375 (match_operand:V1TI 1 "vsx_register_operand" "+wa")
1376 (parallel [(const_int 0)])))
1377 (clobber (match_dup 1))]
1378 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1380 return "xxpermdi %x1,%x1,%x1,2\;stxvd2x %x1,%y0";
1382 [(set_attr "type" "vecstore")])
1384 (define_insn "vsx_st_elemrev_v2df"
1385 [(set (match_operand:V2DF 0 "memory_operand" "=Z")
1387 (match_operand:V2DF 1 "vsx_register_operand" "wa")
1388 (parallel [(const_int 1) (const_int 0)])))]
1389 "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1391 [(set_attr "type" "vecstore")])
1393 (define_insn "vsx_st_elemrev_v2di"
1394 [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1396 (match_operand:V2DI 1 "vsx_register_operand" "wa")
1397 (parallel [(const_int 1) (const_int 0)])))]
1398 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1400 [(set_attr "type" "vecstore")])
1402 (define_insn "vsx_st_elemrev_v4sf"
1403 [(set (match_operand:V4SF 0 "memory_operand" "=Z")
1405 (match_operand:V4SF 1 "vsx_register_operand" "wa")
1406 (parallel [(const_int 3) (const_int 2)
1407 (const_int 1) (const_int 0)])))]
1408 "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1410 [(set_attr "type" "vecstore")])
1412 (define_insn "vsx_st_elemrev_v4si"
1413 [(set (match_operand:V4SI 0 "memory_operand" "=Z")
1415 (match_operand:V4SI 1 "vsx_register_operand" "wa")
1416 (parallel [(const_int 3) (const_int 2)
1417 (const_int 1) (const_int 0)])))]
1418 "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1420 [(set_attr "type" "vecstore")])
1422 (define_expand "vsx_st_elemrev_v8hi"
1423 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1425 (match_operand:V8HI 1 "vsx_register_operand" "wa")
1426 (parallel [(const_int 7) (const_int 6)
1427 (const_int 5) (const_int 4)
1428 (const_int 3) (const_int 2)
1429 (const_int 1) (const_int 0)])))]
1430 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
1432 if (!TARGET_P9_VECTOR)
1434 rtx mem_subreg, subreg, perm[16], pcv;
1435 rtx tmp = gen_reg_rtx (V8HImode);
1436 /* 2 is leftmost element in register */
1437 unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
1440 for (i = 0; i < 16; ++i)
1441 perm[i] = GEN_INT (reorder[i]);
1443 pcv = force_reg (V16QImode,
1444 gen_rtx_CONST_VECTOR (V16QImode,
1445 gen_rtvec_v (16, perm)));
1446 emit_insn (gen_altivec_vperm_v8hi_direct (tmp, operands[1],
1448 subreg = simplify_gen_subreg (V4SImode, tmp, V8HImode, 0);
1449 mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V8HImode, 0);
1450 emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
1455 (define_insn "*vsx_st_elemrev_v2di_internal"
1456 [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1458 (match_operand:V2DI 1 "vsx_register_operand" "wa")
1459 (parallel [(const_int 1) (const_int 0)])))]
1460 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1462 [(set_attr "type" "vecstore")])
1464 (define_insn "*vsx_st_elemrev_v8hi_internal"
1465 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1467 (match_operand:V8HI 1 "vsx_register_operand" "wa")
1468 (parallel [(const_int 7) (const_int 6)
1469 (const_int 5) (const_int 4)
1470 (const_int 3) (const_int 2)
1471 (const_int 1) (const_int 0)])))]
1472 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1474 [(set_attr "type" "vecstore")])
1476 (define_expand "vsx_st_elemrev_v16qi"
1477 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1479 (match_operand:V16QI 1 "vsx_register_operand" "wa")
1480 (parallel [(const_int 15) (const_int 14)
1481 (const_int 13) (const_int 12)
1482 (const_int 11) (const_int 10)
1483 (const_int 9) (const_int 8)
1484 (const_int 7) (const_int 6)
1485 (const_int 5) (const_int 4)
1486 (const_int 3) (const_int 2)
1487 (const_int 1) (const_int 0)])))]
1488 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
1490 if (!TARGET_P9_VECTOR)
1492 rtx mem_subreg, subreg, perm[16], pcv;
1493 rtx tmp = gen_reg_rtx (V16QImode);
1494 /* 3 is leftmost element in register */
1495 unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
1498 for (i = 0; i < 16; ++i)
1499 perm[i] = GEN_INT (reorder[i]);
1501 pcv = force_reg (V16QImode,
1502 gen_rtx_CONST_VECTOR (V16QImode,
1503 gen_rtvec_v (16, perm)));
1504 emit_insn (gen_altivec_vperm_v16qi_direct (tmp, operands[1],
1506 subreg = simplify_gen_subreg (V4SImode, tmp, V16QImode, 0);
1507 mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V16QImode, 0);
1508 emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
1513 (define_insn "*vsx_st_elemrev_v16qi_internal"
1514 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1516 (match_operand:V16QI 1 "vsx_register_operand" "wa")
1517 (parallel [(const_int 15) (const_int 14)
1518 (const_int 13) (const_int 12)
1519 (const_int 11) (const_int 10)
1520 (const_int 9) (const_int 8)
1521 (const_int 7) (const_int 6)
1522 (const_int 5) (const_int 4)
1523 (const_int 3) (const_int 2)
1524 (const_int 1) (const_int 0)])))]
1525 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1527 [(set_attr "type" "vecstore")])
1530 ;; VSX vector floating point arithmetic instructions. The VSX scalar
1531 ;; instructions are now combined with the insn for the traditional floating
1533 (define_insn "*vsx_add<mode>3"
1534 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1535 (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1536 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1537 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1538 "xvadd<sd>p %x0,%x1,%x2"
1539 [(set_attr "type" "<VStype_simple>")])
1541 (define_insn "*vsx_sub<mode>3"
1542 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa>")
1543 (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1544 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1545 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1546 "xvsub<sd>p %x0,%x1,%x2"
1547 [(set_attr "type" "<VStype_simple>")])
1549 (define_insn "*vsx_mul<mode>3"
1550 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1551 (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1552 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1553 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1554 "xvmul<sd>p %x0,%x1,%x2"
1555 [(set_attr "type" "<VStype_simple>")])
1557 ; Emulate vector with scalar for vec_mul in V2DImode
1558 (define_insn_and_split "vsx_mul_v2di"
1559 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1560 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1561 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1563 "VECTOR_MEM_VSX_P (V2DImode)"
1565 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1568 rtx op0 = operands[0];
1569 rtx op1 = operands[1];
1570 rtx op2 = operands[2];
1571 rtx op3 = gen_reg_rtx (DImode);
1572 rtx op4 = gen_reg_rtx (DImode);
1573 rtx op5 = gen_reg_rtx (DImode);
1574 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1575 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1576 if (TARGET_POWERPC64)
1577 emit_insn (gen_muldi3 (op5, op3, op4));
1580 rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
1581 emit_move_insn (op5, ret);
1583 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1584 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1585 if (TARGET_POWERPC64)
1586 emit_insn (gen_muldi3 (op3, op3, op4));
1589 rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
1590 emit_move_insn (op3, ret);
1592 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1595 [(set_attr "type" "mul")])
1597 (define_insn "*vsx_div<mode>3"
1598 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1599 (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1600 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1601 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1602 "xvdiv<sd>p %x0,%x1,%x2"
1603 [(set_attr "type" "<VStype_div>")])
1605 ; Emulate vector with scalar for vec_div in V2DImode
1606 (define_insn_and_split "vsx_div_v2di"
1607 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1608 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1609 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1611 "VECTOR_MEM_VSX_P (V2DImode)"
1613 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1616 rtx op0 = operands[0];
1617 rtx op1 = operands[1];
1618 rtx op2 = operands[2];
1619 rtx op3 = gen_reg_rtx (DImode);
1620 rtx op4 = gen_reg_rtx (DImode);
1621 rtx op5 = gen_reg_rtx (DImode);
1622 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1623 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1624 if (TARGET_POWERPC64)
1625 emit_insn (gen_divdi3 (op5, op3, op4));
1628 rtx libfunc = optab_libfunc (sdiv_optab, DImode);
1629 rtx target = emit_library_call_value (libfunc,
1630 op5, LCT_NORMAL, DImode,
1633 emit_move_insn (op5, target);
1635 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1636 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1637 if (TARGET_POWERPC64)
1638 emit_insn (gen_divdi3 (op3, op3, op4));
1641 rtx libfunc = optab_libfunc (sdiv_optab, DImode);
1642 rtx target = emit_library_call_value (libfunc,
1643 op3, LCT_NORMAL, DImode,
1646 emit_move_insn (op3, target);
1648 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1651 [(set_attr "type" "div")])
1653 (define_insn_and_split "vsx_udiv_v2di"
1654 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1655 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1656 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1658 "VECTOR_MEM_VSX_P (V2DImode)"
1660 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1663 rtx op0 = operands[0];
1664 rtx op1 = operands[1];
1665 rtx op2 = operands[2];
1666 rtx op3 = gen_reg_rtx (DImode);
1667 rtx op4 = gen_reg_rtx (DImode);
1668 rtx op5 = gen_reg_rtx (DImode);
1669 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1670 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1671 if (TARGET_POWERPC64)
1672 emit_insn (gen_udivdi3 (op5, op3, op4));
1675 rtx libfunc = optab_libfunc (udiv_optab, DImode);
1676 rtx target = emit_library_call_value (libfunc,
1677 op5, LCT_NORMAL, DImode,
1680 emit_move_insn (op5, target);
1682 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1683 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1684 if (TARGET_POWERPC64)
1685 emit_insn (gen_udivdi3 (op3, op3, op4));
1688 rtx libfunc = optab_libfunc (udiv_optab, DImode);
1689 rtx target = emit_library_call_value (libfunc,
1690 op3, LCT_NORMAL, DImode,
1693 emit_move_insn (op3, target);
1695 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1698 [(set_attr "type" "div")])
1700 ;; *tdiv* instruction returning the FG flag
1701 (define_expand "vsx_tdiv<mode>3_fg"
1703 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")
1704 (match_operand:VSX_B 2 "vsx_register_operand")]
1706 (set (match_operand:SI 0 "gpc_reg_operand")
1707 (gt:SI (match_dup 3)
1709 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1711 operands[3] = gen_reg_rtx (CCFPmode);
1714 ;; *tdiv* instruction returning the FE flag
1715 (define_expand "vsx_tdiv<mode>3_fe"
1717 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")
1718 (match_operand:VSX_B 2 "vsx_register_operand")]
1720 (set (match_operand:SI 0 "gpc_reg_operand")
1721 (eq:SI (match_dup 3)
1723 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1725 operands[3] = gen_reg_rtx (CCFPmode);
1728 (define_insn "*vsx_tdiv<mode>3_internal"
1729 [(set (match_operand:CCFP 0 "cc_reg_operand" "=x")
1730 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "wa")
1731 (match_operand:VSX_B 2 "vsx_register_operand" "wa")]
1733 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1734 "x<VSv>tdiv<sd>p %0,%x1,%x2"
1735 [(set_attr "type" "<VStype_simple>")])
1737 (define_insn "vsx_fre<mode>2"
1738 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1739 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
1741 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1743 [(set_attr "type" "<VStype_simple>")])
1745 (define_insn "*vsx_neg<mode>2"
1746 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1747 (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
1748 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1749 "xvneg<sd>p %x0,%x1"
1750 [(set_attr "type" "<VStype_simple>")])
1752 (define_insn "*vsx_abs<mode>2"
1753 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1754 (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
1755 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1756 "xvabs<sd>p %x0,%x1"
1757 [(set_attr "type" "<VStype_simple>")])
1759 (define_insn "vsx_nabs<mode>2"
1760 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1763 (match_operand:VSX_F 1 "vsx_register_operand" "wa"))))]
1764 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1765 "xvnabs<sd>p %x0,%x1"
1766 [(set_attr "type" "<VStype_simple>")])
1768 (define_insn "vsx_smax<mode>3"
1769 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1770 (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1771 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1772 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1773 "xvmax<sd>p %x0,%x1,%x2"
1774 [(set_attr "type" "<VStype_simple>")])
1776 (define_insn "*vsx_smin<mode>3"
1777 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1778 (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1779 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1780 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1781 "xvmin<sd>p %x0,%x1,%x2"
1782 [(set_attr "type" "<VStype_simple>")])
1784 (define_insn "*vsx_sqrt<mode>2"
1785 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1786 (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
1787 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1788 "xvsqrt<sd>p %x0,%x1"
1789 [(set_attr "type" "<sd>sqrt")])
1791 (define_insn "*vsx_rsqrte<mode>2"
1792 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1793 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
1795 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1796 "xvrsqrte<sd>p %x0,%x1"
1797 [(set_attr "type" "<VStype_simple>")])
1799 ;; *tsqrt* returning the fg flag
1800 (define_expand "vsx_tsqrt<mode>2_fg"
1802 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")]
1804 (set (match_operand:SI 0 "gpc_reg_operand")
1805 (gt:SI (match_dup 2)
1807 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1809 operands[2] = gen_reg_rtx (CCFPmode);
1812 ;; *tsqrt* returning the fe flag
1813 (define_expand "vsx_tsqrt<mode>2_fe"
1815 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")]
1817 (set (match_operand:SI 0 "gpc_reg_operand")
1818 (eq:SI (match_dup 2)
1820 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1822 operands[2] = gen_reg_rtx (CCFPmode);
1825 (define_insn "*vsx_tsqrt<mode>2_internal"
1826 [(set (match_operand:CCFP 0 "cc_reg_operand" "=x")
1827 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
1829 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1830 "x<VSv>tsqrt<sd>p %0,%x1"
1831 [(set_attr "type" "<VStype_simple>")])
1833 ;; Fused vector multiply/add instructions. Support the classical Altivec
1834 ;; versions of fma, which allows the target to be a separate register from the
1835 ;; 3 inputs. Under VSX, the target must be either the addend or the first
1838 (define_insn "*vsx_fmav4sf4"
1839 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,v")
1841 (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa,v")
1842 (match_operand:V4SF 2 "vsx_register_operand" "wa,0,v")
1843 (match_operand:V4SF 3 "vsx_register_operand" "0,wa,v")))]
1844 "VECTOR_UNIT_VSX_P (V4SFmode)"
1846 xvmaddasp %x0,%x1,%x2
1847 xvmaddmsp %x0,%x1,%x3
1848 vmaddfp %0,%1,%2,%3"
1849 [(set_attr "type" "vecfloat")])
1851 (define_insn "*vsx_fmav2df4"
1852 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,wa")
1854 (match_operand:V2DF 1 "vsx_register_operand" "%wa,wa")
1855 (match_operand:V2DF 2 "vsx_register_operand" "wa,0")
1856 (match_operand:V2DF 3 "vsx_register_operand" "0,wa")))]
1857 "VECTOR_UNIT_VSX_P (V2DFmode)"
1859 xvmaddadp %x0,%x1,%x2
1860 xvmaddmdp %x0,%x1,%x3"
1861 [(set_attr "type" "vecdouble")])
1863 (define_insn "*vsx_fms<mode>4"
1864 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa,wa")
1866 (match_operand:VSX_F 1 "vsx_register_operand" "%wa,wa")
1867 (match_operand:VSX_F 2 "vsx_register_operand" "wa,0")
1869 (match_operand:VSX_F 3 "vsx_register_operand" "0,wa"))))]
1870 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1872 xvmsuba<sd>p %x0,%x1,%x2
1873 xvmsubm<sd>p %x0,%x1,%x3"
1874 [(set_attr "type" "<VStype_mul>")])
1876 (define_insn "*vsx_nfma<mode>4"
1877 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa,wa")
1880 (match_operand:VSX_F 1 "vsx_register_operand" "wa,wa")
1881 (match_operand:VSX_F 2 "vsx_register_operand" "wa,0")
1882 (match_operand:VSX_F 3 "vsx_register_operand" "0,wa"))))]
1883 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1885 xvnmadda<sd>p %x0,%x1,%x2
1886 xvnmaddm<sd>p %x0,%x1,%x3"
1887 [(set_attr "type" "<VStype_mul>")])
1889 (define_insn "*vsx_nfmsv4sf4"
1890 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,v")
1893 (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa,v")
1894 (match_operand:V4SF 2 "vsx_register_operand" "wa,0,v")
1896 (match_operand:V4SF 3 "vsx_register_operand" "0,wa,v")))))]
1897 "VECTOR_UNIT_VSX_P (V4SFmode)"
1899 xvnmsubasp %x0,%x1,%x2
1900 xvnmsubmsp %x0,%x1,%x3
1901 vnmsubfp %0,%1,%2,%3"
1902 [(set_attr "type" "vecfloat")])
1904 (define_insn "*vsx_nfmsv2df4"
1905 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,wa")
1908 (match_operand:V2DF 1 "vsx_register_operand" "%wa,wa")
1909 (match_operand:V2DF 2 "vsx_register_operand" "wa,0")
1911 (match_operand:V2DF 3 "vsx_register_operand" "0,wa")))))]
1912 "VECTOR_UNIT_VSX_P (V2DFmode)"
1914 xvnmsubadp %x0,%x1,%x2
1915 xvnmsubmdp %x0,%x1,%x3"
1916 [(set_attr "type" "vecdouble")])
1918 ;; Vector conditional expressions (no scalar version for these instructions)
1919 (define_insn "vsx_eq<mode>"
1920 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1921 (eq:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1922 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1923 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1924 "xvcmpeq<sd>p %x0,%x1,%x2"
1925 [(set_attr "type" "<VStype_simple>")])
1927 (define_insn "vsx_gt<mode>"
1928 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1929 (gt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1930 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1931 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1932 "xvcmpgt<sd>p %x0,%x1,%x2"
1933 [(set_attr "type" "<VStype_simple>")])
1935 (define_insn "*vsx_ge<mode>"
1936 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1937 (ge:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1938 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1939 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1940 "xvcmpge<sd>p %x0,%x1,%x2"
1941 [(set_attr "type" "<VStype_simple>")])
1943 ;; Compare vectors producing a vector result and a predicate, setting CR6 to
1944 ;; indicate a combined status
1945 (define_insn "*vsx_eq_<mode>_p"
1946 [(set (reg:CC CR6_REGNO)
1948 [(eq:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1949 (match_operand:VSX_F 2 "vsx_register_operand" "wa"))]
1951 (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1952 (eq:VSX_F (match_dup 1)
1954 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1955 "xvcmpeq<sd>p. %x0,%x1,%x2"
1956 [(set_attr "type" "<VStype_simple>")])
1958 (define_insn "*vsx_gt_<mode>_p"
1959 [(set (reg:CC CR6_REGNO)
1961 [(gt:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1962 (match_operand:VSX_F 2 "vsx_register_operand" "wa"))]
1964 (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1965 (gt:VSX_F (match_dup 1)
1967 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1968 "xvcmpgt<sd>p. %x0,%x1,%x2"
1969 [(set_attr "type" "<VStype_simple>")])
1971 (define_insn "*vsx_ge_<mode>_p"
1972 [(set (reg:CC CR6_REGNO)
1974 [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1975 (match_operand:VSX_F 2 "vsx_register_operand" "wa"))]
1977 (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1978 (ge:VSX_F (match_dup 1)
1980 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1981 "xvcmpge<sd>p. %x0,%x1,%x2"
1982 [(set_attr "type" "<VStype_simple>")])
1985 (define_insn "*vsx_xxsel<mode>"
1986 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
1988 (ne:CC (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
1989 (match_operand:VSX_L 4 "zero_constant" ""))
1990 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")
1991 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,wa")))]
1992 "VECTOR_MEM_VSX_P (<MODE>mode)"
1993 "xxsel %x0,%x3,%x2,%x1"
1994 [(set_attr "type" "vecmove")
1995 (set_attr "isa" "<VSisa>")])
1997 (define_insn "*vsx_xxsel<mode>_uns"
1998 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
2000 (ne:CCUNS (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
2001 (match_operand:VSX_L 4 "zero_constant" ""))
2002 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")
2003 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,wa")))]
2004 "VECTOR_MEM_VSX_P (<MODE>mode)"
2005 "xxsel %x0,%x3,%x2,%x1"
2006 [(set_attr "type" "vecmove")
2007 (set_attr "isa" "<VSisa>")])
2010 (define_insn "vsx_copysign<mode>3"
2011 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2013 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
2014 (match_operand:VSX_F 2 "vsx_register_operand" "wa")]
2016 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2017 "xvcpsgn<sd>p %x0,%x2,%x1"
2018 [(set_attr "type" "<VStype_simple>")])
2020 ;; For the conversions, limit the register class for the integer value to be
2021 ;; the fprs because we don't want to add the altivec registers to movdi/movsi.
2022 ;; For the unsigned tests, there isn't a generic double -> unsigned conversion
2023 ;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX.
2024 ;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md
2025 ;; in allowing virtual registers.
2026 (define_insn "vsx_float<VSi><mode>2"
2027 [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=wa")
2028 (float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "wa")))]
2029 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2030 "xvcvsx<VSc><sd>p %x0,%x1"
2031 [(set_attr "type" "<VStype_simple>")])
2033 (define_insn "vsx_floatuns<VSi><mode>2"
2034 [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=wa")
2035 (unsigned_float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "wa")))]
2036 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2037 "xvcvux<VSc><sd>p %x0,%x1"
2038 [(set_attr "type" "<VStype_simple>")])
2040 (define_insn "vsx_fix_trunc<mode><VSi>2"
2041 [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=wa")
2042 (fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "wa")))]
2043 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2044 "x<VSv>cv<sd>psx<VSc>s %x0,%x1"
2045 [(set_attr "type" "<VStype_simple>")])
2047 (define_insn "vsx_fixuns_trunc<mode><VSi>2"
2048 [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=wa")
2049 (unsigned_fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "wa")))]
2050 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2051 "x<VSv>cv<sd>pux<VSc>s %x0,%x1"
2052 [(set_attr "type" "<VStype_simple>")])
2054 ;; Math rounding functions
2055 (define_insn "vsx_x<VSv>r<sd>pi"
2056 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa")
2057 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
2058 UNSPEC_VSX_ROUND_I))]
2059 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2060 "x<VSv>r<sd>pi %x0,%x1"
2061 [(set_attr "type" "<VStype_simple>")])
2063 (define_insn "vsx_x<VSv>r<sd>pic"
2064 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa")
2065 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
2066 UNSPEC_VSX_ROUND_IC))]
2067 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2068 "x<VSv>r<sd>pic %x0,%x1"
2069 [(set_attr "type" "<VStype_simple>")])
2071 (define_insn "vsx_btrunc<mode>2"
2072 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2073 (fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
2074 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2075 "xvr<sd>piz %x0,%x1"
2076 [(set_attr "type" "<VStype_simple>")])
2078 (define_insn "*vsx_b2trunc<mode>2"
2079 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa")
2080 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
2082 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2083 "x<VSv>r<sd>piz %x0,%x1"
2084 [(set_attr "type" "<VStype_simple>")])
2086 (define_insn "vsx_floor<mode>2"
2087 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2088 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
2090 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2091 "xvr<sd>pim %x0,%x1"
2092 [(set_attr "type" "<VStype_simple>")])
2094 (define_insn "vsx_ceil<mode>2"
2095 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2096 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
2098 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2099 "xvr<sd>pip %x0,%x1"
2100 [(set_attr "type" "<VStype_simple>")])
2103 ;; VSX convert to/from double vector
2105 ;; Convert between single and double precision
2106 ;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal
2107 ;; scalar single precision instructions internally use the double format.
2108 ;; Prefer the altivec registers, since we likely will need to do a vperm
2109 (define_insn "vsx_xscvdpsp"
2110 [(set (match_operand:V4SF 0 "vsx_register_operand" "=f,?wa")
2111 (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "f,wa")]
2112 UNSPEC_VSX_CVSPDP))]
2113 "VECTOR_UNIT_VSX_P (DFmode)"
2115 [(set_attr "type" "fp")])
2117 (define_insn "vsx_xvcvspdp"
2118 [(set (match_operand:V2DF 0 "vsx_register_operand" "=v,?wa")
2119 (unspec:V2DF [(match_operand:V4SF 1 "vsx_register_operand" "wa,wa")]
2120 UNSPEC_VSX_CVSPDP))]
2121 "VECTOR_UNIT_VSX_P (V4SFmode)"
2123 [(set_attr "type" "vecdouble")])
2125 (define_insn "vsx_xvcvdpsp"
2126 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,?wa")
2127 (unspec:V4SF [(match_operand:V2DF 1 "vsx_register_operand" "v,wa")]
2128 UNSPEC_VSX_CVSPDP))]
2129 "VECTOR_UNIT_VSX_P (V2DFmode)"
2131 [(set_attr "type" "vecdouble")])
2133 ;; xscvspdp, represent the scalar SF type as V4SF
2134 (define_insn "vsx_xscvspdp"
2135 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2136 (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2137 UNSPEC_VSX_CVSPDP))]
2138 "VECTOR_UNIT_VSX_P (V4SFmode)"
2140 [(set_attr "type" "fp")])
2142 ;; Same as vsx_xscvspdp, but use SF as the type
2143 (define_insn "vsx_xscvspdp_scalar2"
2144 [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
2145 (unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2146 UNSPEC_VSX_CVSPDP))]
2147 "VECTOR_UNIT_VSX_P (V4SFmode)"
2149 [(set_attr "type" "fp")])
2151 ;; Generate xvcvhpsp instruction
2152 (define_insn "vsx_xvcvhpsp"
2153 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2154 (unspec:V4SF [(match_operand: V16QI 1 "vsx_register_operand" "wa")]
2155 UNSPEC_VSX_CVHPSP))]
2158 [(set_attr "type" "vecfloat")])
2160 ;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF
2161 ;; format of scalars is actually DF.
2162 (define_insn "vsx_xscvdpsp_scalar"
2163 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2164 (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2165 UNSPEC_VSX_CVSPDP))]
2166 "VECTOR_UNIT_VSX_P (V4SFmode)"
2168 [(set_attr "type" "fp")])
2170 ;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs
2171 (define_insn "vsx_xscvdpspn"
2172 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2173 (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "wa")]
2174 UNSPEC_VSX_CVDPSPN))]
2177 [(set_attr "type" "fp")])
2179 (define_insn "vsx_xscvspdpn"
2180 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2181 (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2182 UNSPEC_VSX_CVSPDPN))]
2185 [(set_attr "type" "fp")])
2187 (define_insn "vsx_xscvdpspn_scalar"
2188 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2189 (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2190 UNSPEC_VSX_CVDPSPN))]
2193 [(set_attr "type" "fp")])
2195 ;; Used by direct move to move a SFmode value from GPR to VSX register
2196 (define_insn "vsx_xscvspdpn_directmove"
2197 [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
2198 (unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2199 UNSPEC_VSX_CVSPDPN))]
2202 [(set_attr "type" "fp")])
2204 ;; Convert and scale (used by vec_ctf, vec_cts, vec_ctu for double/long long)
2206 (define_expand "vsx_xvcvsxddp_scale"
2207 [(match_operand:V2DF 0 "vsx_register_operand")
2208 (match_operand:V2DI 1 "vsx_register_operand")
2209 (match_operand:QI 2 "immediate_operand")]
2210 "VECTOR_UNIT_VSX_P (V2DFmode)"
2212 rtx op0 = operands[0];
2213 rtx op1 = operands[1];
2214 int scale = INTVAL(operands[2]);
2215 emit_insn (gen_vsx_xvcvsxddp (op0, op1));
2217 rs6000_scale_v2df (op0, op0, -scale);
2221 (define_insn "vsx_xvcvsxddp"
2222 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2223 (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2224 UNSPEC_VSX_XVCVSXDDP))]
2225 "VECTOR_UNIT_VSX_P (V2DFmode)"
2227 [(set_attr "type" "vecdouble")])
2229 (define_expand "vsx_xvcvuxddp_scale"
2230 [(match_operand:V2DF 0 "vsx_register_operand")
2231 (match_operand:V2DI 1 "vsx_register_operand")
2232 (match_operand:QI 2 "immediate_operand")]
2233 "VECTOR_UNIT_VSX_P (V2DFmode)"
2235 rtx op0 = operands[0];
2236 rtx op1 = operands[1];
2237 int scale = INTVAL(operands[2]);
2238 emit_insn (gen_vsx_xvcvuxddp (op0, op1));
2240 rs6000_scale_v2df (op0, op0, -scale);
2244 (define_insn "vsx_xvcvuxddp"
2245 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2246 (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2247 UNSPEC_VSX_XVCVUXDDP))]
2248 "VECTOR_UNIT_VSX_P (V2DFmode)"
2250 [(set_attr "type" "vecdouble")])
2252 (define_expand "vsx_xvcvdpsxds_scale"
2253 [(match_operand:V2DI 0 "vsx_register_operand")
2254 (match_operand:V2DF 1 "vsx_register_operand")
2255 (match_operand:QI 2 "immediate_operand")]
2256 "VECTOR_UNIT_VSX_P (V2DFmode)"
2258 rtx op0 = operands[0];
2259 rtx op1 = operands[1];
2261 int scale = INTVAL (operands[2]);
2266 tmp = gen_reg_rtx (V2DFmode);
2267 rs6000_scale_v2df (tmp, op1, scale);
2269 emit_insn (gen_vsx_xvcvdpsxds (op0, tmp));
2273 ;; convert vector of 64-bit floating point numbers to vector of
2274 ;; 64-bit signed integer
2275 (define_insn "vsx_xvcvdpsxds"
2276 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
2277 (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
2278 UNSPEC_VSX_XVCVDPSXDS))]
2279 "VECTOR_UNIT_VSX_P (V2DFmode)"
2280 "xvcvdpsxds %x0,%x1"
2281 [(set_attr "type" "vecdouble")])
2283 ;; convert vector of 32-bit floating point numbers to vector of
2284 ;; 32-bit signed integer
2285 (define_insn "vsx_xvcvspsxws"
2286 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
2287 (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2288 UNSPEC_VSX_XVCVSPSXWS))]
2289 "VECTOR_UNIT_VSX_P (V4SFmode)"
2290 "xvcvspsxws %x0,%x1"
2291 [(set_attr "type" "vecfloat")])
2293 ;; convert vector of 64-bit floating point numbers to vector of
2294 ;; 64-bit unsigned integer
2295 (define_expand "vsx_xvcvdpuxds_scale"
2296 [(match_operand:V2DI 0 "vsx_register_operand")
2297 (match_operand:V2DF 1 "vsx_register_operand")
2298 (match_operand:QI 2 "immediate_operand")]
2299 "VECTOR_UNIT_VSX_P (V2DFmode)"
2301 rtx op0 = operands[0];
2302 rtx op1 = operands[1];
2304 int scale = INTVAL (operands[2]);
2309 tmp = gen_reg_rtx (V2DFmode);
2310 rs6000_scale_v2df (tmp, op1, scale);
2312 emit_insn (gen_vsx_xvcvdpuxds (op0, tmp));
2316 ;; convert vector of 32-bit floating point numbers to vector of
2317 ;; 32-bit unsigned integer
2318 (define_insn "vsx_xvcvspuxws"
2319 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
2320 (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2321 UNSPEC_VSX_XVCVSPSXWS))]
2322 "VECTOR_UNIT_VSX_P (V4SFmode)"
2323 "xvcvspuxws %x0,%x1"
2324 [(set_attr "type" "vecfloat")])
2326 (define_insn "vsx_xvcvdpuxds"
2327 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
2328 (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
2329 UNSPEC_VSX_XVCVDPUXDS))]
2330 "VECTOR_UNIT_VSX_P (V2DFmode)"
2331 "xvcvdpuxds %x0,%x1"
2332 [(set_attr "type" "vecdouble")])
2334 ;; Convert from 64-bit to 32-bit types
2335 ;; Note, favor the Altivec registers since the usual use of these instructions
2336 ;; is in vector converts and we need to use the Altivec vperm instruction.
2338 (define_insn "vsx_xvcvdpsxws"
2339 [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
2340 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wa,wa")]
2341 UNSPEC_VSX_CVDPSXWS))]
2342 "VECTOR_UNIT_VSX_P (V2DFmode)"
2343 "xvcvdpsxws %x0,%x1"
2344 [(set_attr "type" "vecdouble")])
2346 (define_insn "vsx_xvcvdpuxws"
2347 [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
2348 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wa,wa")]
2349 UNSPEC_VSX_CVDPUXWS))]
2350 "VECTOR_UNIT_VSX_P (V2DFmode)"
2351 "xvcvdpuxws %x0,%x1"
2352 [(set_attr "type" "vecdouble")])
2354 (define_insn "vsx_xvcvsxdsp"
2355 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2356 (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2357 UNSPEC_VSX_CVSXDSP))]
2358 "VECTOR_UNIT_VSX_P (V2DFmode)"
2360 [(set_attr "type" "vecfloat")])
2362 (define_insn "vsx_xvcvuxdsp"
2363 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2364 (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2365 UNSPEC_VSX_CVUXDSP))]
2366 "VECTOR_UNIT_VSX_P (V2DFmode)"
2368 [(set_attr "type" "vecdouble")])
2370 (define_insn "vsx_xvcdpsp"
2371 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2372 (unspec:V4SF [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
2373 UNSPEC_VSX_XVCDPSP))]
2374 "VECTOR_UNIT_VSX_P (V2DFmode)"
2376 [(set_attr "type" "vecdouble")])
2378 ;; Convert from 32-bit to 64-bit types
2379 ;; Provide both vector and scalar targets
2380 (define_insn "vsx_xvcvsxwdp"
2381 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2382 (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2383 UNSPEC_VSX_CVSXWDP))]
2384 "VECTOR_UNIT_VSX_P (V2DFmode)"
2386 [(set_attr "type" "vecdouble")])
2388 (define_insn "vsx_xvcvsxwdp_df"
2389 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2390 (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2391 UNSPEC_VSX_CVSXWDP))]
2394 [(set_attr "type" "vecdouble")])
2396 (define_insn "vsx_xvcvuxwdp"
2397 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2398 (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2399 UNSPEC_VSX_CVUXWDP))]
2400 "VECTOR_UNIT_VSX_P (V2DFmode)"
2402 [(set_attr "type" "vecdouble")])
2404 (define_insn "vsx_xvcvuxwdp_df"
2405 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2406 (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2407 UNSPEC_VSX_CVUXWDP))]
2410 [(set_attr "type" "vecdouble")])
2412 (define_insn "vsx_xvcvspsxds"
2413 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2414 (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wa,wa")]
2415 UNSPEC_VSX_CVSPSXDS))]
2416 "VECTOR_UNIT_VSX_P (V2DFmode)"
2417 "xvcvspsxds %x0,%x1"
2418 [(set_attr "type" "vecdouble")])
2420 (define_insn "vsx_xvcvspuxds"
2421 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2422 (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wa,wa")]
2423 UNSPEC_VSX_CVSPUXDS))]
2424 "VECTOR_UNIT_VSX_P (V2DFmode)"
2425 "xvcvspuxds %x0,%x1"
2426 [(set_attr "type" "vecdouble")])
2428 (define_insn "vsx_xvcvsxwsp"
2429 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2430 (unspec:V4SF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2431 UNSPEC_VSX_CVSXWSP))]
2432 "VECTOR_UNIT_VSX_P (V4SFmode)"
2434 [(set_attr "type" "vecfloat")])
2436 (define_insn "vsx_xvcvuxwsp"
2437 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2438 (unspec:V4SF[(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2439 UNSPEC_VSX_CVUXWSP))]
2440 "VECTOR_UNIT_VSX_P (V4SFmode)"
2442 [(set_attr "type" "vecfloat")])
2444 ;; Generate float2 double
2445 ;; convert two double to float
2446 (define_expand "float2_v2df"
2447 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2448 (use (match_operand:V2DF 1 "register_operand" "wa"))
2449 (use (match_operand:V2DF 2 "register_operand" "wa"))]
2450 "VECTOR_UNIT_VSX_P (V4SFmode)"
2452 rtx rtx_src1, rtx_src2, rtx_dst;
2454 rtx_dst = operands[0];
2455 rtx_src1 = operands[1];
2456 rtx_src2 = operands[2];
2458 rs6000_generate_float2_double_code (rtx_dst, rtx_src1, rtx_src2);
2463 ;; convert two long long signed ints to float
2464 (define_expand "float2_v2di"
2465 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2466 (use (match_operand:V2DI 1 "register_operand" "wa"))
2467 (use (match_operand:V2DI 2 "register_operand" "wa"))]
2468 "VECTOR_UNIT_VSX_P (V4SFmode)"
2470 rtx rtx_src1, rtx_src2, rtx_dst;
2472 rtx_dst = operands[0];
2473 rtx_src1 = operands[1];
2474 rtx_src2 = operands[2];
2476 rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2480 ;; Generate uns_float2
2481 ;; convert two long long unsigned ints to float
2482 (define_expand "uns_float2_v2di"
2483 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2484 (use (match_operand:V2DI 1 "register_operand" "wa"))
2485 (use (match_operand:V2DI 2 "register_operand" "wa"))]
2486 "VECTOR_UNIT_VSX_P (V4SFmode)"
2488 rtx rtx_src1, rtx_src2, rtx_dst;
2490 rtx_dst = operands[0];
2491 rtx_src1 = operands[1];
2492 rtx_src2 = operands[2];
2494 rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2499 ;; convert double or long long signed to float
2500 ;; (Only even words are valid, BE numbering)
2501 (define_expand "floate<mode>"
2502 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2503 (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2504 "VECTOR_UNIT_VSX_P (V4SFmode)"
2506 if (BYTES_BIG_ENDIAN)
2508 /* Shift left one word to put even word correct location */
2510 rtx rtx_val = GEN_INT (4);
2512 rtx_tmp = gen_reg_rtx (V4SFmode);
2513 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2514 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2515 rtx_tmp, rtx_tmp, rtx_val));
2518 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2523 ;; Generate uns_floate
2524 ;; convert long long unsigned to float
2525 ;; (Only even words are valid, BE numbering)
2526 (define_expand "unsfloatev2di"
2527 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2528 (use (match_operand:V2DI 1 "register_operand" "wa"))]
2529 "VECTOR_UNIT_VSX_P (V4SFmode)"
2531 if (BYTES_BIG_ENDIAN)
2533 /* Shift left one word to put even word correct location */
2535 rtx rtx_val = GEN_INT (4);
2537 rtx_tmp = gen_reg_rtx (V4SFmode);
2538 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2539 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2540 rtx_tmp, rtx_tmp, rtx_val));
2543 emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2549 ;; convert double or long long signed to float
2550 ;; Only odd words are valid, BE numbering)
2551 (define_expand "floato<mode>"
2552 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2553 (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2554 "VECTOR_UNIT_VSX_P (V4SFmode)"
2556 if (BYTES_BIG_ENDIAN)
2557 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2560 /* Shift left one word to put odd word correct location */
2562 rtx rtx_val = GEN_INT (4);
2564 rtx_tmp = gen_reg_rtx (V4SFmode);
2565 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2566 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2567 rtx_tmp, rtx_tmp, rtx_val));
2572 ;; Generate uns_floato
2573 ;; convert long long unsigned to float
2574 ;; (Only odd words are valid, BE numbering)
2575 (define_expand "unsfloatov2di"
2576 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2577 (use (match_operand:V2DI 1 "register_operand" "wa"))]
2578 "VECTOR_UNIT_VSX_P (V4SFmode)"
2580 if (BYTES_BIG_ENDIAN)
2581 emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2584 /* Shift left one word to put odd word correct location */
2586 rtx rtx_val = GEN_INT (4);
2588 rtx_tmp = gen_reg_rtx (V4SFmode);
2589 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2590 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2591 rtx_tmp, rtx_tmp, rtx_val));
2596 ;; Generate vsigned2
2597 ;; convert two double float vectors to a vector of single precision ints
2598 (define_expand "vsigned2_v2df"
2599 [(match_operand:V4SI 0 "register_operand" "=wa")
2600 (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "wa")
2601 (match_operand:V2DF 2 "register_operand" "wa")]
2602 UNSPEC_VSX_VSIGNED2)]
2605 rtx rtx_src1, rtx_src2, rtx_dst;
2606 bool signed_convert=true;
2608 rtx_dst = operands[0];
2609 rtx_src1 = operands[1];
2610 rtx_src2 = operands[2];
2612 rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2616 ;; Generate vsignedo_v2df
2617 ;; signed double float to int convert odd word
2618 (define_expand "vsignedo_v2df"
2619 [(set (match_operand:V4SI 0 "register_operand" "=wa")
2620 (match_operand:V2DF 1 "register_operand" "wa"))]
2623 if (BYTES_BIG_ENDIAN)
2626 rtx rtx_val = GEN_INT (12);
2627 rtx_tmp = gen_reg_rtx (V4SImode);
2629 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2631 /* Big endian word numbering for words in operand is 0 1 2 3.
2632 take (operand[1] operand[1]) and shift left one word
2633 0 1 2 3 0 1 2 3 => 1 2 3 0
2634 Words 1 and 3 are now are now where they need to be for result. */
2636 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2640 /* Little endian word numbering for operand is 3 2 1 0.
2641 Result words 3 and 1 are where they need to be. */
2642 emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2646 [(set_attr "type" "veccomplex")])
2648 ;; Generate vsignede_v2df
2649 ;; signed double float to int even word
2650 (define_expand "vsignede_v2df"
2651 [(set (match_operand:V4SI 0 "register_operand" "=v")
2652 (match_operand:V2DF 1 "register_operand" "v"))]
2655 if (BYTES_BIG_ENDIAN)
2656 /* Big endian word numbering for words in operand is 0 1
2657 Result words 0 is where they need to be. */
2658 emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2663 rtx rtx_val = GEN_INT (12);
2664 rtx_tmp = gen_reg_rtx (V4SImode);
2666 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2668 /* Little endian word numbering for operand is 3 2 1 0.
2669 take (operand[1] operand[1]) and shift left three words
2670 0 1 2 3 0 1 2 3 => 3 0 1 2
2671 Words 0 and 2 are now where they need to be for the result. */
2672 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2677 [(set_attr "type" "veccomplex")])
2679 ;; Generate unsigned2
2680 ;; convert two double float vectors to a vector of single precision
2682 (define_expand "vunsigned2_v2df"
2683 [(match_operand:V4SI 0 "register_operand" "=v")
2684 (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "v")
2685 (match_operand:V2DF 2 "register_operand" "v")]
2686 UNSPEC_VSX_VSIGNED2)]
2689 rtx rtx_src1, rtx_src2, rtx_dst;
2690 bool signed_convert=false;
2692 rtx_dst = operands[0];
2693 rtx_src1 = operands[1];
2694 rtx_src2 = operands[2];
2696 rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2700 ;; Generate vunsignedo_v2df
2701 ;; unsigned double float to int convert odd word
2702 (define_expand "vunsignedo_v2df"
2703 [(set (match_operand:V4SI 0 "register_operand" "=v")
2704 (match_operand:V2DF 1 "register_operand" "v"))]
2707 if (BYTES_BIG_ENDIAN)
2710 rtx rtx_val = GEN_INT (12);
2711 rtx_tmp = gen_reg_rtx (V4SImode);
2713 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
2715 /* Big endian word numbering for words in operand is 0 1 2 3.
2716 take (operand[1] operand[1]) and shift left one word
2717 0 1 2 3 0 1 2 3 => 1 2 3 0
2718 Words 1 and 3 are now are now where they need to be for result. */
2720 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2724 /* Little endian word numbering for operand is 3 2 1 0.
2725 Result words 3 and 1 are where they need to be. */
2726 emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
2730 [(set_attr "type" "veccomplex")])
2732 ;; Generate vunsignede_v2df
2733 ;; unsigned double float to int even word
2734 (define_expand "vunsignede_v2df"
2735 [(set (match_operand:V4SI 0 "register_operand" "=v")
2736 (match_operand:V2DF 1 "register_operand" "v"))]
2739 if (BYTES_BIG_ENDIAN)
2740 /* Big endian word numbering for words in operand is 0 1
2741 Result words 0 is where they need to be. */
2742 emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
2747 rtx rtx_val = GEN_INT (12);
2748 rtx_tmp = gen_reg_rtx (V4SImode);
2750 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
2752 /* Little endian word numbering for operand is 3 2 1 0.
2753 take (operand[1] operand[1]) and shift left three words
2754 0 1 2 3 0 1 2 3 => 3 0 1 2
2755 Words 0 and 2 are now where they need to be for the result. */
2756 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2761 [(set_attr "type" "veccomplex")])
2763 ;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since
2764 ;; since the xvrdpiz instruction does not truncate the value if the floating
2765 ;; point value is < LONG_MIN or > LONG_MAX.
2766 (define_insn "*vsx_float_fix_v2df2"
2767 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,?wa")
2770 (match_operand:V2DF 1 "vsx_register_operand" "wa,?wa"))))]
2772 && VECTOR_UNIT_VSX_P (V2DFmode) && flag_unsafe_math_optimizations
2773 && !flag_trapping_math && TARGET_FRIZ"
2775 [(set_attr "type" "vecdouble")])
2778 ;; Permute operations
2780 ;; Build a V2DF/V2DI vector from two scalars
2781 (define_insn "vsx_concat_<mode>"
2782 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we")
2784 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa,b")
2785 (match_operand:<VS_scalar> 2 "gpc_reg_operand" "wa,b")))]
2786 "VECTOR_MEM_VSX_P (<MODE>mode)"
2788 if (which_alternative == 0)
2789 return (BYTES_BIG_ENDIAN
2790 ? "xxpermdi %x0,%x1,%x2,0"
2791 : "xxpermdi %x0,%x2,%x1,0");
2793 else if (which_alternative == 1)
2794 return (BYTES_BIG_ENDIAN
2795 ? "mtvsrdd %x0,%1,%2"
2796 : "mtvsrdd %x0,%2,%1");
2801 [(set_attr "type" "vecperm")])
2803 ;; Combiner patterns to allow creating XXPERMDI's to access either double
2804 ;; word element in a vector register.
2805 (define_insn "*vsx_concat_<mode>_1"
2806 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2808 (vec_select:<VS_scalar>
2809 (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
2810 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
2811 (match_operand:<VS_scalar> 3 "gpc_reg_operand" "wa")))]
2812 "VECTOR_MEM_VSX_P (<MODE>mode)"
2814 HOST_WIDE_INT dword = INTVAL (operands[2]);
2815 if (BYTES_BIG_ENDIAN)
2817 operands[4] = GEN_INT (2*dword);
2818 return "xxpermdi %x0,%x1,%x3,%4";
2822 operands[4] = GEN_INT (!dword);
2823 return "xxpermdi %x0,%x3,%x1,%4";
2826 [(set_attr "type" "vecperm")])
2828 (define_insn "*vsx_concat_<mode>_2"
2829 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2831 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa")
2832 (vec_select:<VS_scalar>
2833 (match_operand:VSX_D 2 "gpc_reg_operand" "wa")
2834 (parallel [(match_operand:QI 3 "const_0_to_1_operand" "n")]))))]
2835 "VECTOR_MEM_VSX_P (<MODE>mode)"
2837 HOST_WIDE_INT dword = INTVAL (operands[3]);
2838 if (BYTES_BIG_ENDIAN)
2840 operands[4] = GEN_INT (dword);
2841 return "xxpermdi %x0,%x1,%x2,%4";
2845 operands[4] = GEN_INT (2 * !dword);
2846 return "xxpermdi %x0,%x2,%x1,%4";
2849 [(set_attr "type" "vecperm")])
2851 (define_insn "*vsx_concat_<mode>_3"
2852 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2854 (vec_select:<VS_scalar>
2855 (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
2856 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
2857 (vec_select:<VS_scalar>
2858 (match_operand:VSX_D 3 "gpc_reg_operand" "wa")
2859 (parallel [(match_operand:QI 4 "const_0_to_1_operand" "n")]))))]
2860 "VECTOR_MEM_VSX_P (<MODE>mode)"
2862 HOST_WIDE_INT dword1 = INTVAL (operands[2]);
2863 HOST_WIDE_INT dword2 = INTVAL (operands[4]);
2864 if (BYTES_BIG_ENDIAN)
2866 operands[5] = GEN_INT ((2 * dword1) + dword2);
2867 return "xxpermdi %x0,%x1,%x3,%5";
2871 operands[5] = GEN_INT ((2 * !dword2) + !dword1);
2872 return "xxpermdi %x0,%x3,%x1,%5";
2875 [(set_attr "type" "vecperm")])
2877 ;; Special purpose concat using xxpermdi to glue two single precision values
2878 ;; together, relying on the fact that internally scalar floats are represented
2879 ;; as doubles. This is used to initialize a V4SF vector with 4 floats
2880 (define_insn "vsx_concat_v2sf"
2881 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2883 [(match_operand:SF 1 "vsx_register_operand" "wa")
2884 (match_operand:SF 2 "vsx_register_operand" "wa")]
2885 UNSPEC_VSX_CONCAT))]
2886 "VECTOR_MEM_VSX_P (V2DFmode)"
2888 if (BYTES_BIG_ENDIAN)
2889 return "xxpermdi %x0,%x1,%x2,0";
2891 return "xxpermdi %x0,%x2,%x1,0";
2893 [(set_attr "type" "vecperm")])
2895 ;; Concatenate 4 SImode elements into a V4SImode reg.
2896 (define_expand "vsx_init_v4si"
2897 [(use (match_operand:V4SI 0 "gpc_reg_operand"))
2898 (use (match_operand:SI 1 "gpc_reg_operand"))
2899 (use (match_operand:SI 2 "gpc_reg_operand"))
2900 (use (match_operand:SI 3 "gpc_reg_operand"))
2901 (use (match_operand:SI 4 "gpc_reg_operand"))]
2902 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
2904 rtx a = gen_reg_rtx (DImode);
2905 rtx b = gen_reg_rtx (DImode);
2906 rtx c = gen_reg_rtx (DImode);
2907 rtx d = gen_reg_rtx (DImode);
2908 emit_insn (gen_zero_extendsidi2 (a, operands[1]));
2909 emit_insn (gen_zero_extendsidi2 (b, operands[2]));
2910 emit_insn (gen_zero_extendsidi2 (c, operands[3]));
2911 emit_insn (gen_zero_extendsidi2 (d, operands[4]));
2912 if (!BYTES_BIG_ENDIAN)
2918 rtx aa = gen_reg_rtx (DImode);
2919 rtx ab = gen_reg_rtx (DImode);
2920 rtx cc = gen_reg_rtx (DImode);
2921 rtx cd = gen_reg_rtx (DImode);
2922 emit_insn (gen_ashldi3 (aa, a, GEN_INT (32)));
2923 emit_insn (gen_ashldi3 (cc, c, GEN_INT (32)));
2924 emit_insn (gen_iordi3 (ab, aa, b));
2925 emit_insn (gen_iordi3 (cd, cc, d));
2927 rtx abcd = gen_reg_rtx (V2DImode);
2928 emit_insn (gen_vsx_concat_v2di (abcd, ab, cd));
2929 emit_move_insn (operands[0], gen_lowpart (V4SImode, abcd));
2933 ;; xxpermdi for little endian loads and stores. We need several of
2934 ;; these since the form of the PARALLEL differs by mode.
2935 (define_insn "*vsx_xxpermdi2_le_<mode>"
2936 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2938 (match_operand:VSX_D 1 "vsx_register_operand" "wa")
2939 (parallel [(const_int 1) (const_int 0)])))]
2940 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
2941 "xxpermdi %x0,%x1,%x1,2"
2942 [(set_attr "type" "vecperm")])
2944 (define_insn "*vsx_xxpermdi4_le_<mode>"
2945 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
2947 (match_operand:VSX_W 1 "vsx_register_operand" "wa")
2948 (parallel [(const_int 2) (const_int 3)
2949 (const_int 0) (const_int 1)])))]
2950 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
2951 "xxpermdi %x0,%x1,%x1,2"
2952 [(set_attr "type" "vecperm")])
2954 (define_insn "*vsx_xxpermdi8_le_V8HI"
2955 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
2957 (match_operand:V8HI 1 "vsx_register_operand" "wa")
2958 (parallel [(const_int 4) (const_int 5)
2959 (const_int 6) (const_int 7)
2960 (const_int 0) (const_int 1)
2961 (const_int 2) (const_int 3)])))]
2962 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"
2963 "xxpermdi %x0,%x1,%x1,2"
2964 [(set_attr "type" "vecperm")])
2966 (define_insn "*vsx_xxpermdi16_le_V16QI"
2967 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
2969 (match_operand:V16QI 1 "vsx_register_operand" "wa")
2970 (parallel [(const_int 8) (const_int 9)
2971 (const_int 10) (const_int 11)
2972 (const_int 12) (const_int 13)
2973 (const_int 14) (const_int 15)
2974 (const_int 0) (const_int 1)
2975 (const_int 2) (const_int 3)
2976 (const_int 4) (const_int 5)
2977 (const_int 6) (const_int 7)])))]
2978 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"
2979 "xxpermdi %x0,%x1,%x1,2"
2980 [(set_attr "type" "vecperm")])
2982 ;; lxvd2x for little endian loads. We need several of
2983 ;; these since the form of the PARALLEL differs by mode.
2984 (define_insn "*vsx_lxvd2x2_le_<mode>"
2985 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2987 (match_operand:VSX_D 1 "memory_operand" "Z")
2988 (parallel [(const_int 1) (const_int 0)])))]
2989 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
2991 [(set_attr "type" "vecload")])
2993 (define_insn "*vsx_lxvd2x4_le_<mode>"
2994 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
2996 (match_operand:VSX_W 1 "memory_operand" "Z")
2997 (parallel [(const_int 2) (const_int 3)
2998 (const_int 0) (const_int 1)])))]
2999 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3001 [(set_attr "type" "vecload")])
3003 (define_insn "*vsx_lxvd2x8_le_V8HI"
3004 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
3006 (match_operand:V8HI 1 "memory_operand" "Z")
3007 (parallel [(const_int 4) (const_int 5)
3008 (const_int 6) (const_int 7)
3009 (const_int 0) (const_int 1)
3010 (const_int 2) (const_int 3)])))]
3011 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
3013 [(set_attr "type" "vecload")])
3015 (define_insn "*vsx_lxvd2x16_le_V16QI"
3016 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
3018 (match_operand:V16QI 1 "memory_operand" "Z")
3019 (parallel [(const_int 8) (const_int 9)
3020 (const_int 10) (const_int 11)
3021 (const_int 12) (const_int 13)
3022 (const_int 14) (const_int 15)
3023 (const_int 0) (const_int 1)
3024 (const_int 2) (const_int 3)
3025 (const_int 4) (const_int 5)
3026 (const_int 6) (const_int 7)])))]
3027 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
3029 [(set_attr "type" "vecload")])
3031 ;; stxvd2x for little endian stores. We need several of
3032 ;; these since the form of the PARALLEL differs by mode.
3033 (define_insn "*vsx_stxvd2x2_le_<mode>"
3034 [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
3036 (match_operand:VSX_D 1 "vsx_register_operand" "wa")
3037 (parallel [(const_int 1) (const_int 0)])))]
3038 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3040 [(set_attr "type" "vecstore")])
3042 (define_insn "*vsx_stxvd2x4_le_<mode>"
3043 [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
3045 (match_operand:VSX_W 1 "vsx_register_operand" "wa")
3046 (parallel [(const_int 2) (const_int 3)
3047 (const_int 0) (const_int 1)])))]
3048 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3050 [(set_attr "type" "vecstore")])
3052 (define_insn "*vsx_stxvd2x8_le_V8HI"
3053 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
3055 (match_operand:V8HI 1 "vsx_register_operand" "wa")
3056 (parallel [(const_int 4) (const_int 5)
3057 (const_int 6) (const_int 7)
3058 (const_int 0) (const_int 1)
3059 (const_int 2) (const_int 3)])))]
3060 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
3062 [(set_attr "type" "vecstore")])
3064 (define_insn "*vsx_stxvd2x16_le_V16QI"
3065 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
3067 (match_operand:V16QI 1 "vsx_register_operand" "wa")
3068 (parallel [(const_int 8) (const_int 9)
3069 (const_int 10) (const_int 11)
3070 (const_int 12) (const_int 13)
3071 (const_int 14) (const_int 15)
3072 (const_int 0) (const_int 1)
3073 (const_int 2) (const_int 3)
3074 (const_int 4) (const_int 5)
3075 (const_int 6) (const_int 7)])))]
3076 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
3078 [(set_attr "type" "vecstore")])
3080 ;; Convert a TImode value into V1TImode
3081 (define_expand "vsx_set_v1ti"
3082 [(match_operand:V1TI 0 "nonimmediate_operand")
3083 (match_operand:V1TI 1 "nonimmediate_operand")
3084 (match_operand:TI 2 "input_operand")
3085 (match_operand:QI 3 "u5bit_cint_operand")]
3086 "VECTOR_MEM_VSX_P (V1TImode)"
3088 if (operands[3] != const0_rtx)
3091 emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1]));
3095 ;; Rewrite V2DF/V2DI set in terms of VEC_CONCAT
3096 (define_expand "vsx_set_<mode>"
3097 [(use (match_operand:VSX_D 0 "vsx_register_operand"))
3098 (use (match_operand:VSX_D 1 "vsx_register_operand"))
3099 (use (match_operand:<VS_scalar> 2 "gpc_reg_operand"))
3100 (use (match_operand:QI 3 "const_0_to_1_operand"))]
3101 "VECTOR_MEM_VSX_P (<MODE>mode)"
3103 rtx dest = operands[0];
3104 rtx vec_reg = operands[1];
3105 rtx value = operands[2];
3106 rtx ele = operands[3];
3107 rtx tmp = gen_reg_rtx (<VS_scalar>mode);
3109 if (ele == const0_rtx)
3111 emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const1_rtx));
3112 emit_insn (gen_vsx_concat_<mode> (dest, value, tmp));
3115 else if (ele == const1_rtx)
3117 emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const0_rtx));
3118 emit_insn (gen_vsx_concat_<mode> (dest, tmp, value));
3125 ;; Extract a DF/DI element from V2DF/V2DI
3126 ;; Optimize cases were we can do a simple or direct move.
3127 ;; Or see if we can avoid doing the move at all
3129 ;; There are some unresolved problems with reload that show up if an Altivec
3130 ;; register was picked. Limit the scalar value to FPRs for now.
3132 (define_insn "vsx_extract_<mode>"
3133 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=d, d, wr, wr")
3134 (vec_select:<VS_scalar>
3135 (match_operand:VSX_D 1 "gpc_reg_operand" "wa, wa, wa, wa")
3137 [(match_operand:QI 2 "const_0_to_1_operand" "wD, n, wD, n")])))]
3138 "VECTOR_MEM_VSX_P (<MODE>mode)"
3140 int element = INTVAL (operands[2]);
3141 int op0_regno = REGNO (operands[0]);
3142 int op1_regno = REGNO (operands[1]);
3145 gcc_assert (IN_RANGE (element, 0, 1));
3146 gcc_assert (VSX_REGNO_P (op1_regno));
3148 if (element == VECTOR_ELEMENT_SCALAR_64BIT)
3150 if (op0_regno == op1_regno)
3151 return ASM_COMMENT_START " vec_extract to same register";
3153 else if (INT_REGNO_P (op0_regno) && TARGET_DIRECT_MOVE
3154 && TARGET_POWERPC64)
3155 return "mfvsrd %0,%x1";
3157 else if (FP_REGNO_P (op0_regno) && FP_REGNO_P (op1_regno))
3160 else if (VSX_REGNO_P (op0_regno))
3161 return "xxlor %x0,%x1,%x1";
3167 else if (element == VECTOR_ELEMENT_MFVSRLD_64BIT && INT_REGNO_P (op0_regno)
3168 && TARGET_P9_VECTOR && TARGET_POWERPC64 && TARGET_DIRECT_MOVE)
3169 return "mfvsrld %0,%x1";
3171 else if (VSX_REGNO_P (op0_regno))
3173 fldDM = element << 1;
3174 if (!BYTES_BIG_ENDIAN)
3176 operands[3] = GEN_INT (fldDM);
3177 return "xxpermdi %x0,%x1,%x1,%3";
3183 [(set_attr "type" "veclogical,mftgpr,mftgpr,vecperm")
3184 (set_attr "isa" "*,*,p8v,p9v")])
3186 ;; Optimize extracting a single scalar element from memory.
3187 (define_insn_and_split "*vsx_extract_<P:mode>_<VSX_D:mode>_load"
3188 [(set (match_operand:<VS_scalar> 0 "register_operand" "=wa,wr")
3189 (vec_select:<VSX_D:VS_scalar>
3190 (match_operand:VSX_D 1 "memory_operand" "m,m")
3191 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n,n")])))
3192 (clobber (match_scratch:P 3 "=&b,&b"))]
3193 "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<VSX_D:MODE>mode)"
3195 "&& reload_completed"
3196 [(set (match_dup 0) (match_dup 4))]
3198 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3199 operands[3], <VSX_D:VS_scalar>mode);
3201 [(set_attr "type" "fpload,load")
3202 (set_attr "length" "8")])
3204 ;; Optimize storing a single scalar element that is the right location to
3206 (define_insn "*vsx_extract_<mode>_store"
3207 [(set (match_operand:<VS_scalar> 0 "memory_operand" "=m,Z,wY")
3208 (vec_select:<VS_scalar>
3209 (match_operand:VSX_D 1 "register_operand" "d,v,v")
3210 (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))]
3211 "VECTOR_MEM_VSX_P (<MODE>mode)"
3216 [(set_attr "type" "fpstore")
3217 (set_attr "isa" "*,p7v,p9v")])
3219 ;; Variable V2DI/V2DF extract shift
3220 (define_insn "vsx_vslo_<mode>"
3221 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v")
3222 (unspec:<VS_scalar> [(match_operand:VSX_D 1 "gpc_reg_operand" "v")
3223 (match_operand:V2DI 2 "gpc_reg_operand" "v")]
3225 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3227 [(set_attr "type" "vecperm")])
3229 ;; Variable V2DI/V2DF extract
3230 (define_insn_and_split "vsx_extract_<mode>_var"
3231 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v,wa,r")
3232 (unspec:<VS_scalar> [(match_operand:VSX_D 1 "input_operand" "v,m,m")
3233 (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3234 UNSPEC_VSX_EXTRACT))
3235 (clobber (match_scratch:DI 3 "=r,&b,&b"))
3236 (clobber (match_scratch:V2DI 4 "=&v,X,X"))]
3237 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3239 "&& reload_completed"
3242 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3243 operands[3], operands[4]);
3247 ;; Extract a SF element from V4SF
3248 (define_insn_and_split "vsx_extract_v4sf"
3249 [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
3251 (match_operand:V4SF 1 "vsx_register_operand" "wa")
3252 (parallel [(match_operand:QI 2 "u5bit_cint_operand" "n")])))
3253 (clobber (match_scratch:V4SF 3 "=0"))]
3254 "VECTOR_UNIT_VSX_P (V4SFmode)"
3259 rtx op0 = operands[0];
3260 rtx op1 = operands[1];
3261 rtx op2 = operands[2];
3262 rtx op3 = operands[3];
3264 HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2);
3270 if (GET_CODE (op3) == SCRATCH)
3271 op3 = gen_reg_rtx (V4SFmode);
3272 emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, GEN_INT (ele)));
3275 emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp));
3278 [(set_attr "length" "8")
3279 (set_attr "type" "fp")])
3281 (define_insn_and_split "*vsx_extract_v4sf_<mode>_load"
3282 [(set (match_operand:SF 0 "register_operand" "=f,v,v,?r")
3284 (match_operand:V4SF 1 "memory_operand" "m,Z,m,m")
3285 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
3286 (clobber (match_scratch:P 3 "=&b,&b,&b,&b"))]
3287 "VECTOR_MEM_VSX_P (V4SFmode)"
3289 "&& reload_completed"
3290 [(set (match_dup 0) (match_dup 4))]
3292 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3293 operands[3], SFmode);
3295 [(set_attr "type" "fpload,fpload,fpload,load")
3296 (set_attr "length" "8")
3297 (set_attr "isa" "*,p7v,p9v,*")])
3299 ;; Variable V4SF extract
3300 (define_insn_and_split "vsx_extract_v4sf_var"
3301 [(set (match_operand:SF 0 "gpc_reg_operand" "=wa,wa,?r")
3302 (unspec:SF [(match_operand:V4SF 1 "input_operand" "v,m,m")
3303 (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3304 UNSPEC_VSX_EXTRACT))
3305 (clobber (match_scratch:DI 3 "=r,&b,&b"))
3306 (clobber (match_scratch:V2DI 4 "=&v,X,X"))]
3307 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT"
3309 "&& reload_completed"
3312 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3313 operands[3], operands[4]);
3317 ;; Expand the builtin form of xxpermdi to canonical rtl.
3318 (define_expand "vsx_xxpermdi_<mode>"
3319 [(match_operand:VSX_L 0 "vsx_register_operand")
3320 (match_operand:VSX_L 1 "vsx_register_operand")
3321 (match_operand:VSX_L 2 "vsx_register_operand")
3322 (match_operand:QI 3 "u5bit_cint_operand")]
3323 "VECTOR_MEM_VSX_P (<MODE>mode)"
3325 rtx target = operands[0];
3326 rtx op0 = operands[1];
3327 rtx op1 = operands[2];
3328 int mask = INTVAL (operands[3]);
3329 rtx perm0 = GEN_INT ((mask >> 1) & 1);
3330 rtx perm1 = GEN_INT ((mask & 1) + 2);
3331 rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
3333 if (<MODE>mode == V2DFmode)
3334 gen = gen_vsx_xxpermdi2_v2df_1;
3337 gen = gen_vsx_xxpermdi2_v2di_1;
3338 if (<MODE>mode != V2DImode)
3340 target = gen_lowpart (V2DImode, target);
3341 op0 = gen_lowpart (V2DImode, op0);
3342 op1 = gen_lowpart (V2DImode, op1);
3345 emit_insn (gen (target, op0, op1, perm0, perm1));
3349 ;; Special version of xxpermdi that retains big-endian semantics.
3350 (define_expand "vsx_xxpermdi_<mode>_be"
3351 [(match_operand:VSX_L 0 "vsx_register_operand")
3352 (match_operand:VSX_L 1 "vsx_register_operand")
3353 (match_operand:VSX_L 2 "vsx_register_operand")
3354 (match_operand:QI 3 "u5bit_cint_operand")]
3355 "VECTOR_MEM_VSX_P (<MODE>mode)"
3357 rtx target = operands[0];
3358 rtx op0 = operands[1];
3359 rtx op1 = operands[2];
3360 int mask = INTVAL (operands[3]);
3361 rtx perm0 = GEN_INT ((mask >> 1) & 1);
3362 rtx perm1 = GEN_INT ((mask & 1) + 2);
3363 rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
3365 if (<MODE>mode == V2DFmode)
3366 gen = gen_vsx_xxpermdi2_v2df_1;
3369 gen = gen_vsx_xxpermdi2_v2di_1;
3370 if (<MODE>mode != V2DImode)
3372 target = gen_lowpart (V2DImode, target);
3373 op0 = gen_lowpart (V2DImode, op0);
3374 op1 = gen_lowpart (V2DImode, op1);
3377 /* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a
3378 transformation we don't want; it is necessary for
3379 rs6000_expand_vec_perm_const_1 but not for this use. So we
3380 prepare for that by reversing the transformation here. */
3381 if (BYTES_BIG_ENDIAN)
3382 emit_insn (gen (target, op0, op1, perm0, perm1));
3385 rtx p0 = GEN_INT (3 - INTVAL (perm1));
3386 rtx p1 = GEN_INT (3 - INTVAL (perm0));
3387 emit_insn (gen (target, op1, op0, p0, p1));
3392 (define_insn "vsx_xxpermdi2_<mode>_1"
3393 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3395 (vec_concat:<VS_double>
3396 (match_operand:VSX_D 1 "vsx_register_operand" "wa")
3397 (match_operand:VSX_D 2 "vsx_register_operand" "wa"))
3398 (parallel [(match_operand 3 "const_0_to_1_operand" "")
3399 (match_operand 4 "const_2_to_3_operand" "")])))]
3400 "VECTOR_MEM_VSX_P (<MODE>mode)"
3404 /* For little endian, swap operands and invert/swap selectors
3405 to get the correct xxpermdi. The operand swap sets up the
3406 inputs as a little endian array. The selectors are swapped
3407 because they are defined to use big endian ordering. The
3408 selectors are inverted to get the correct doublewords for
3409 little endian ordering. */
3410 if (BYTES_BIG_ENDIAN)
3412 op3 = INTVAL (operands[3]);
3413 op4 = INTVAL (operands[4]);
3417 op3 = 3 - INTVAL (operands[4]);
3418 op4 = 3 - INTVAL (operands[3]);
3421 mask = (op3 << 1) | (op4 - 2);
3422 operands[3] = GEN_INT (mask);
3424 if (BYTES_BIG_ENDIAN)
3425 return "xxpermdi %x0,%x1,%x2,%3";
3427 return "xxpermdi %x0,%x2,%x1,%3";
3429 [(set_attr "type" "vecperm")])
3431 ;; Extraction of a single element in a small integer vector. Until ISA 3.0,
3432 ;; none of the small types were allowed in a vector register, so we had to
3433 ;; extract to a DImode and either do a direct move or store.
3434 (define_expand "vsx_extract_<mode>"
3435 [(parallel [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand")
3436 (vec_select:<VS_scalar>
3437 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand")
3438 (parallel [(match_operand:QI 2 "const_int_operand")])))
3439 (clobber (match_scratch:VSX_EXTRACT_I 3))])]
3440 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3442 /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}. */
3443 if (TARGET_P9_VECTOR)
3445 emit_insn (gen_vsx_extract_<mode>_p9 (operands[0], operands[1],
3451 (define_insn "vsx_extract_<mode>_p9"
3452 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,<VSX_EX>")
3453 (vec_select:<VS_scalar>
3454 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,<VSX_EX>")
3455 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n,n")])))
3456 (clobber (match_scratch:SI 3 "=r,X"))]
3457 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3459 if (which_alternative == 0)
3464 HOST_WIDE_INT elt = INTVAL (operands[2]);
3465 HOST_WIDE_INT elt_adj = (!BYTES_BIG_ENDIAN
3466 ? GET_MODE_NUNITS (<MODE>mode) - 1 - elt
3469 HOST_WIDE_INT unit_size = GET_MODE_UNIT_SIZE (<MODE>mode);
3470 HOST_WIDE_INT offset = unit_size * elt_adj;
3472 operands[2] = GEN_INT (offset);
3474 return "xxextractuw %x0,%x1,%2";
3476 return "vextractu<wd> %0,%1,%2";
3479 [(set_attr "type" "vecsimple")
3480 (set_attr "isa" "p9v,*")])
3483 [(set (match_operand:<VS_scalar> 0 "int_reg_operand")
3484 (vec_select:<VS_scalar>
3485 (match_operand:VSX_EXTRACT_I 1 "altivec_register_operand")
3486 (parallel [(match_operand:QI 2 "const_int_operand")])))
3487 (clobber (match_operand:SI 3 "int_reg_operand"))]
3488 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB && reload_completed"
3491 rtx op0_si = gen_rtx_REG (SImode, REGNO (operands[0]));
3492 rtx op1 = operands[1];
3493 rtx op2 = operands[2];
3494 rtx op3 = operands[3];
3495 HOST_WIDE_INT offset = INTVAL (op2) * GET_MODE_UNIT_SIZE (<MODE>mode);
3497 emit_move_insn (op3, GEN_INT (offset));
3498 if (BYTES_BIG_ENDIAN)
3499 emit_insn (gen_vextu<wd>lx (op0_si, op3, op1));
3501 emit_insn (gen_vextu<wd>rx (op0_si, op3, op1));
3505 ;; Optimize zero extracts to eliminate the AND after the extract.
3506 (define_insn_and_split "*vsx_extract_<mode>_di_p9"
3507 [(set (match_operand:DI 0 "gpc_reg_operand" "=r,<VSX_EX>")
3509 (vec_select:<VS_scalar>
3510 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,<VSX_EX>")
3511 (parallel [(match_operand:QI 2 "const_int_operand" "n,n")]))))
3512 (clobber (match_scratch:SI 3 "=r,X"))]
3513 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3515 "&& reload_completed"
3516 [(parallel [(set (match_dup 4)
3517 (vec_select:<VS_scalar>
3519 (parallel [(match_dup 2)])))
3520 (clobber (match_dup 3))])]
3522 operands[4] = gen_rtx_REG (<VS_scalar>mode, REGNO (operands[0]));
3524 [(set_attr "isa" "p9v,*")])
3526 ;; Optimize stores to use the ISA 3.0 scalar store instructions
3527 (define_insn_and_split "*vsx_extract_<mode>_store_p9"
3528 [(set (match_operand:<VS_scalar> 0 "memory_operand" "=Z,m")
3529 (vec_select:<VS_scalar>
3530 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>,v")
3531 (parallel [(match_operand:QI 2 "const_int_operand" "n,n")])))
3532 (clobber (match_scratch:<VS_scalar> 3 "=<VSX_EX>,&r"))
3533 (clobber (match_scratch:SI 4 "=X,&r"))]
3534 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3536 "&& reload_completed"
3537 [(parallel [(set (match_dup 3)
3538 (vec_select:<VS_scalar>
3540 (parallel [(match_dup 2)])))
3541 (clobber (match_dup 4))])
3545 (define_insn_and_split "*vsx_extract_si"
3546 [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wa,Z")
3548 (match_operand:V4SI 1 "gpc_reg_operand" "v,v,v")
3549 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")])))
3550 (clobber (match_scratch:V4SI 3 "=v,v,v"))]
3551 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT && !TARGET_P9_VECTOR"
3553 "&& reload_completed"
3556 rtx dest = operands[0];
3557 rtx src = operands[1];
3558 rtx element = operands[2];
3559 rtx vec_tmp = operands[3];
3562 if (!BYTES_BIG_ENDIAN)
3563 element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3565 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3567 value = INTVAL (element);
3569 emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element));
3573 if (MEM_P (operands[0]))
3575 if (can_create_pseudo_p ())
3576 dest = rs6000_force_indexed_or_indirect_mem (dest);
3578 if (TARGET_P8_VECTOR)
3579 emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
3581 emit_insn (gen_stfiwx (dest, gen_rtx_REG (DImode, REGNO (vec_tmp))));
3584 else if (TARGET_P8_VECTOR)
3585 emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
3587 emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
3588 gen_rtx_REG (DImode, REGNO (vec_tmp)));
3592 [(set_attr "type" "mftgpr,vecperm,fpstore")
3593 (set_attr "length" "8")
3594 (set_attr "isa" "*,p8v,*")])
3596 (define_insn_and_split "*vsx_extract_<mode>_p8"
3597 [(set (match_operand:<VS_scalar> 0 "nonimmediate_operand" "=r")
3598 (vec_select:<VS_scalar>
3599 (match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand" "v")
3600 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
3601 (clobber (match_scratch:VSX_EXTRACT_I2 3 "=v"))]
3602 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3603 && !TARGET_P9_VECTOR"
3605 "&& reload_completed"
3608 rtx dest = operands[0];
3609 rtx src = operands[1];
3610 rtx element = operands[2];
3611 rtx vec_tmp = operands[3];
3614 if (!BYTES_BIG_ENDIAN)
3615 element = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1 - INTVAL (element));
3617 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3619 value = INTVAL (element);
3620 if (<MODE>mode == V16QImode)
3623 emit_insn (gen_altivec_vspltb_direct (vec_tmp, src, element));
3627 else if (<MODE>mode == V8HImode)
3630 emit_insn (gen_altivec_vsplth_direct (vec_tmp, src, element));
3637 emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
3638 gen_rtx_REG (DImode, REGNO (vec_tmp)));
3641 [(set_attr "type" "mftgpr")])
3643 ;; Optimize extracting a single scalar element from memory.
3644 (define_insn_and_split "*vsx_extract_<mode>_load"
3645 [(set (match_operand:<VS_scalar> 0 "register_operand" "=r")
3646 (vec_select:<VS_scalar>
3647 (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m")
3648 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
3649 (clobber (match_scratch:DI 3 "=&b"))]
3650 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3652 "&& reload_completed"
3653 [(set (match_dup 0) (match_dup 4))]
3655 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3656 operands[3], <VS_scalar>mode);
3658 [(set_attr "type" "load")
3659 (set_attr "length" "8")])
3661 ;; Variable V16QI/V8HI/V4SI extract
3662 (define_insn_and_split "vsx_extract_<mode>_var"
3663 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,r,r")
3665 [(match_operand:VSX_EXTRACT_I 1 "input_operand" "v,v,m")
3666 (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3667 UNSPEC_VSX_EXTRACT))
3668 (clobber (match_scratch:DI 3 "=r,r,&b"))
3669 (clobber (match_scratch:V2DI 4 "=X,&v,X"))]
3670 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3672 "&& reload_completed"
3675 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3676 operands[3], operands[4]);
3679 [(set_attr "isa" "p9v,*,*")])
3681 (define_insn_and_split "*vsx_extract_<mode>_<VS_scalar>mode_var"
3682 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,r,r")
3683 (zero_extend:<VS_scalar>
3684 (unspec:<VSX_EXTRACT_I:VS_scalar>
3685 [(match_operand:VSX_EXTRACT_I 1 "input_operand" "v,v,m")
3686 (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3687 UNSPEC_VSX_EXTRACT)))
3688 (clobber (match_scratch:DI 3 "=r,r,&b"))
3689 (clobber (match_scratch:V2DI 4 "=X,&v,X"))]
3690 "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3692 "&& reload_completed"
3695 machine_mode smode = <VS_scalar>mode;
3696 rs6000_split_vec_extract_var (gen_rtx_REG (smode, REGNO (operands[0])),
3697 operands[1], operands[2],
3698 operands[3], operands[4]);
3701 [(set_attr "isa" "p9v,*,*")])
3703 ;; VSX_EXTRACT optimizations
3704 ;; Optimize double d = (double) vec_extract (vi, <n>)
3705 ;; Get the element into the top position and use XVCVSWDP/XVCVUWDP
3706 (define_insn_and_split "*vsx_extract_si_<uns>float_df"
3707 [(set (match_operand:DF 0 "gpc_reg_operand" "=wa")
3710 (match_operand:V4SI 1 "gpc_reg_operand" "v")
3711 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
3712 (clobber (match_scratch:V4SI 3 "=v"))]
3713 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3718 rtx dest = operands[0];
3719 rtx src = operands[1];
3720 rtx element = operands[2];
3721 rtx v4si_tmp = operands[3];
3724 if (!BYTES_BIG_ENDIAN)
3725 element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3727 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3729 value = INTVAL (element);
3732 if (GET_CODE (v4si_tmp) == SCRATCH)
3733 v4si_tmp = gen_reg_rtx (V4SImode);
3734 emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
3739 emit_insn (gen_vsx_xvcv<su>xwdp_df (dest, v4si_tmp));
3743 ;; Optimize <type> f = (<type>) vec_extract (vi, <n>)
3744 ;; where <type> is a floating point type that supported by the hardware that is
3745 ;; not double. First convert the value to double, and then to the desired
3747 (define_insn_and_split "*vsx_extract_si_<uns>float_<mode>"
3748 [(set (match_operand:VSX_EXTRACT_FL 0 "gpc_reg_operand" "=wa")
3749 (any_float:VSX_EXTRACT_FL
3751 (match_operand:V4SI 1 "gpc_reg_operand" "v")
3752 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
3753 (clobber (match_scratch:V4SI 3 "=v"))
3754 (clobber (match_scratch:DF 4 "=wa"))]
3755 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3760 rtx dest = operands[0];
3761 rtx src = operands[1];
3762 rtx element = operands[2];
3763 rtx v4si_tmp = operands[3];
3764 rtx df_tmp = operands[4];
3767 if (!BYTES_BIG_ENDIAN)
3768 element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3770 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3772 value = INTVAL (element);
3775 if (GET_CODE (v4si_tmp) == SCRATCH)
3776 v4si_tmp = gen_reg_rtx (V4SImode);
3777 emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
3782 if (GET_CODE (df_tmp) == SCRATCH)
3783 df_tmp = gen_reg_rtx (DFmode);
3785 emit_insn (gen_vsx_xvcv<su>xwdp_df (df_tmp, v4si_tmp));
3787 if (<MODE>mode == SFmode)
3788 emit_insn (gen_truncdfsf2 (dest, df_tmp));
3789 else if (<MODE>mode == TFmode && FLOAT128_IBM_P (TFmode))
3790 emit_insn (gen_extenddftf2_vsx (dest, df_tmp));
3791 else if (<MODE>mode == TFmode && FLOAT128_IEEE_P (TFmode)
3792 && TARGET_FLOAT128_HW)
3793 emit_insn (gen_extenddftf2_hw (dest, df_tmp));
3794 else if (<MODE>mode == IFmode && FLOAT128_IBM_P (IFmode))
3795 emit_insn (gen_extenddfif2 (dest, df_tmp));
3796 else if (<MODE>mode == KFmode && TARGET_FLOAT128_HW)
3797 emit_insn (gen_extenddfkf2_hw (dest, df_tmp));
3804 ;; Optimize <type> f = (<ftype>) vec_extract (<vtype>, <n>)
3805 ;; Where <ftype> is SFmode, DFmode (and KFmode/TFmode if those types are IEEE
3806 ;; 128-bit hardware types) and <vtype> is vector char, vector unsigned char,
3807 ;; vector short or vector unsigned short.
3808 (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_fl_<FL_CONV:mode>"
3809 [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=wa")
3811 (vec_select:<VSX_EXTRACT_I:VS_scalar>
3812 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
3813 (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
3814 (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
3815 "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3816 && TARGET_P9_VECTOR"
3818 "&& reload_completed"
3819 [(parallel [(set (match_dup 3)
3820 (vec_select:<VSX_EXTRACT_I:VS_scalar>
3822 (parallel [(match_dup 2)])))
3823 (clobber (scratch:SI))])
3825 (sign_extend:DI (match_dup 3)))
3827 (float:<FL_CONV:MODE> (match_dup 4)))]
3829 operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
3831 [(set_attr "isa" "<VSisa>")])
3833 (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_ufl_<FL_CONV:mode>"
3834 [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=wa")
3835 (unsigned_float:FL_CONV
3836 (vec_select:<VSX_EXTRACT_I:VS_scalar>
3837 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
3838 (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
3839 (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
3840 "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3841 && TARGET_P9_VECTOR"
3843 "&& reload_completed"
3844 [(parallel [(set (match_dup 3)
3845 (vec_select:<VSX_EXTRACT_I:VS_scalar>
3847 (parallel [(match_dup 2)])))
3848 (clobber (scratch:SI))])
3850 (float:<FL_CONV:MODE> (match_dup 4)))]
3852 operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
3854 [(set_attr "isa" "<VSisa>")])
3856 ;; V4SI/V8HI/V16QI set operation on ISA 3.0
3857 (define_insn "vsx_set_<mode>_p9"
3858 [(set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=<VSX_EX>")
3859 (unspec:VSX_EXTRACT_I
3860 [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "0")
3861 (match_operand:<VS_scalar> 2 "gpc_reg_operand" "<VSX_EX>")
3862 (match_operand:QI 3 "<VSX_EXTRACT_PREDICATE>" "n")]
3864 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
3866 int ele = INTVAL (operands[3]);
3867 int nunits = GET_MODE_NUNITS (<MODE>mode);
3869 if (!BYTES_BIG_ENDIAN)
3870 ele = nunits - 1 - ele;
3872 operands[3] = GEN_INT (GET_MODE_SIZE (<VS_scalar>mode) * ele);
3873 if (<MODE>mode == V4SImode)
3874 return "xxinsertw %x0,%x2,%3";
3876 return "vinsert<wd> %0,%2,%3";
3878 [(set_attr "type" "vecperm")])
3880 (define_insn_and_split "vsx_set_v4sf_p9"
3881 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
3883 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
3884 (match_operand:SF 2 "gpc_reg_operand" "wa")
3885 (match_operand:QI 3 "const_0_to_3_operand" "n")]
3887 (clobber (match_scratch:SI 4 "=&wa"))]
3888 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
3890 "&& reload_completed"
3892 (unspec:V4SF [(match_dup 2)]
3893 UNSPEC_VSX_CVDPSPN))
3894 (parallel [(set (match_dup 4)
3895 (vec_select:SI (match_dup 6)
3896 (parallel [(match_dup 7)])))
3897 (clobber (scratch:SI))])
3899 (unspec:V4SI [(match_dup 8)
3904 unsigned int tmp_regno = reg_or_subregno (operands[4]);
3906 operands[5] = gen_rtx_REG (V4SFmode, tmp_regno);
3907 operands[6] = gen_rtx_REG (V4SImode, tmp_regno);
3908 operands[7] = GEN_INT (BYTES_BIG_ENDIAN ? 0 : 3);
3909 operands[8] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
3911 [(set_attr "type" "vecperm")
3912 (set_attr "length" "12")
3913 (set_attr "isa" "p9v")])
3915 ;; Special case setting 0.0f to a V4SF element
3916 (define_insn_and_split "*vsx_set_v4sf_p9_zero"
3917 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
3919 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
3920 (match_operand:SF 2 "zero_fp_constant" "j")
3921 (match_operand:QI 3 "const_0_to_3_operand" "n")]
3923 (clobber (match_scratch:SI 4 "=&wa"))]
3924 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
3926 "&& reload_completed"
3930 (unspec:V4SI [(match_dup 5)
3935 operands[5] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
3937 [(set_attr "type" "vecperm")
3938 (set_attr "length" "8")
3939 (set_attr "isa" "p9v")])
3941 ;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is the element
3942 ;; that is in the default scalar position (1 for big endian, 2 for little
3943 ;; endian). We just need to do an xxinsertw since the element is in the
3944 ;; correct location.
3946 (define_insn "*vsx_insert_extract_v4sf_p9"
3947 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
3949 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
3950 (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
3952 [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
3953 (match_operand:QI 4 "const_0_to_3_operand" "n")]
3955 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64
3956 && (INTVAL (operands[3]) == (BYTES_BIG_ENDIAN ? 1 : 2))"
3958 int ele = INTVAL (operands[4]);
3960 if (!BYTES_BIG_ENDIAN)
3961 ele = GET_MODE_NUNITS (V4SFmode) - 1 - ele;
3963 operands[4] = GEN_INT (GET_MODE_SIZE (SFmode) * ele);
3964 return "xxinsertw %x0,%x2,%4";
3966 [(set_attr "type" "vecperm")])
3968 ;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is not the element
3969 ;; that is in the default scalar position (1 for big endian, 2 for little
3970 ;; endian). Convert the insert/extract to int and avoid doing the conversion.
3972 (define_insn_and_split "*vsx_insert_extract_v4sf_p9_2"
3973 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
3975 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
3976 (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
3978 [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
3979 (match_operand:QI 4 "const_0_to_3_operand" "n")]
3981 (clobber (match_scratch:SI 5 "=&wa"))]
3982 "VECTOR_MEM_VSX_P (V4SFmode) && VECTOR_MEM_VSX_P (V4SImode)
3983 && TARGET_P9_VECTOR && TARGET_POWERPC64
3984 && (INTVAL (operands[3]) != (BYTES_BIG_ENDIAN ? 1 : 2))"
3987 [(parallel [(set (match_dup 5)
3988 (vec_select:SI (match_dup 6)
3989 (parallel [(match_dup 3)])))
3990 (clobber (scratch:SI))])
3992 (unspec:V4SI [(match_dup 8)
3997 if (GET_CODE (operands[5]) == SCRATCH)
3998 operands[5] = gen_reg_rtx (SImode);
4000 operands[6] = gen_lowpart (V4SImode, operands[2]);
4001 operands[7] = gen_lowpart (V4SImode, operands[0]);
4002 operands[8] = gen_lowpart (V4SImode, operands[1]);
4004 [(set_attr "type" "vecperm")
4005 (set_attr "isa" "p9v")])
4007 ;; Expanders for builtins
4008 (define_expand "vsx_mergel_<mode>"
4009 [(use (match_operand:VSX_D 0 "vsx_register_operand"))
4010 (use (match_operand:VSX_D 1 "vsx_register_operand"))
4011 (use (match_operand:VSX_D 2 "vsx_register_operand"))]
4012 "VECTOR_MEM_VSX_P (<MODE>mode)"
4014 rtvec v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
4015 rtx x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
4016 x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
4017 emit_insn (gen_rtx_SET (operands[0], x));
4021 (define_expand "vsx_mergeh_<mode>"
4022 [(use (match_operand:VSX_D 0 "vsx_register_operand"))
4023 (use (match_operand:VSX_D 1 "vsx_register_operand"))
4024 (use (match_operand:VSX_D 2 "vsx_register_operand"))]
4025 "VECTOR_MEM_VSX_P (<MODE>mode)"
4027 rtvec v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
4028 rtx x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
4029 x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
4030 emit_insn (gen_rtx_SET (operands[0], x));
4035 ;; We separate the register splat insn from the memory splat insn to force the
4036 ;; register allocator to generate the indexed form of the SPLAT when it is
4037 ;; given an offsettable memory reference. Otherwise, if the register and
4038 ;; memory insns were combined into a single insn, the register allocator will
4039 ;; load the value into a register, and then do a double word permute.
4040 (define_expand "vsx_splat_<mode>"
4041 [(set (match_operand:VSX_D 0 "vsx_register_operand")
4042 (vec_duplicate:VSX_D
4043 (match_operand:<VS_scalar> 1 "input_operand")))]
4044 "VECTOR_MEM_VSX_P (<MODE>mode)"
4046 rtx op1 = operands[1];
4048 operands[1] = rs6000_force_indexed_or_indirect_mem (op1);
4049 else if (!REG_P (op1))
4050 op1 = force_reg (<VSX_D:VS_scalar>mode, op1);
4053 (define_insn "vsx_splat_<mode>_reg"
4054 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we")
4055 (vec_duplicate:VSX_D
4056 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa,b")))]
4057 "VECTOR_MEM_VSX_P (<MODE>mode)"
4059 xxpermdi %x0,%x1,%x1,0
4061 [(set_attr "type" "vecperm")])
4063 (define_insn "vsx_splat_<mode>_mem"
4064 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
4065 (vec_duplicate:VSX_D
4066 (match_operand:<VSX_D:VS_scalar> 1 "memory_operand" "Z")))]
4067 "VECTOR_MEM_VSX_P (<MODE>mode)"
4069 [(set_attr "type" "vecload")])
4071 ;; V4SI splat support
4072 (define_insn "vsx_splat_v4si"
4073 [(set (match_operand:V4SI 0 "vsx_register_operand" "=we,we")
4075 (match_operand:SI 1 "splat_input_operand" "r,Z")))]
4080 [(set_attr "type" "vecperm,vecload")])
4082 ;; SImode is not currently allowed in vector registers. This pattern
4083 ;; allows us to use direct move to get the value in a vector register
4084 ;; so that we can use XXSPLTW
4085 (define_insn "vsx_splat_v4si_di"
4086 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,we")
4089 (match_operand:DI 1 "gpc_reg_operand" "wa,r"))))]
4090 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
4094 [(set_attr "type" "vecperm")
4095 (set_attr "isa" "p8v,*")])
4097 ;; V4SF splat (ISA 3.0)
4098 (define_insn_and_split "vsx_splat_v4sf"
4099 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,wa")
4101 (match_operand:SF 1 "splat_input_operand" "Z,wa,r")))]
4107 "&& reload_completed && vsx_register_operand (operands[1], SFmode)"
4109 (unspec:V4SF [(match_dup 1)] UNSPEC_VSX_CVDPSPN))
4111 (unspec:V4SF [(match_dup 0)
4112 (const_int 0)] UNSPEC_VSX_XXSPLTW))]
4114 [(set_attr "type" "vecload,vecperm,mftgpr")
4115 (set_attr "length" "*,8,*")
4116 (set_attr "isa" "*,p8v,*")])
4118 ;; V4SF/V4SI splat from a vector element
4119 (define_insn "vsx_xxspltw_<mode>"
4120 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4121 (vec_duplicate:VSX_W
4122 (vec_select:<VS_scalar>
4123 (match_operand:VSX_W 1 "vsx_register_operand" "wa")
4125 [(match_operand:QI 2 "u5bit_cint_operand" "n")]))))]
4126 "VECTOR_MEM_VSX_P (<MODE>mode)"
4128 if (!BYTES_BIG_ENDIAN)
4129 operands[2] = GEN_INT (3 - INTVAL (operands[2]));
4131 return "xxspltw %x0,%x1,%2";
4133 [(set_attr "type" "vecperm")])
4135 (define_insn "vsx_xxspltw_<mode>_direct"
4136 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4137 (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "wa")
4138 (match_operand:QI 2 "u5bit_cint_operand" "i")]
4139 UNSPEC_VSX_XXSPLTW))]
4140 "VECTOR_MEM_VSX_P (<MODE>mode)"
4141 "xxspltw %x0,%x1,%2"
4142 [(set_attr "type" "vecperm")])
4144 ;; V16QI/V8HI splat support on ISA 2.07
4145 (define_insn "vsx_vsplt<VSX_SPLAT_SUFFIX>_di"
4146 [(set (match_operand:VSX_SPLAT_I 0 "altivec_register_operand" "=v")
4147 (vec_duplicate:VSX_SPLAT_I
4148 (truncate:<VS_scalar>
4149 (match_operand:DI 1 "altivec_register_operand" "v"))))]
4150 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
4151 "vsplt<VSX_SPLAT_SUFFIX> %0,%1,<VSX_SPLAT_COUNT>"
4152 [(set_attr "type" "vecperm")])
4154 ;; V2DF/V2DI splat for use by vec_splat builtin
4155 (define_insn "vsx_xxspltd_<mode>"
4156 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
4157 (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
4158 (match_operand:QI 2 "u5bit_cint_operand" "i")]
4159 UNSPEC_VSX_XXSPLTD))]
4160 "VECTOR_MEM_VSX_P (<MODE>mode)"
4162 if ((BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 0)
4163 || (!BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 1))
4164 return "xxpermdi %x0,%x1,%x1,0";
4166 return "xxpermdi %x0,%x1,%x1,3";
4168 [(set_attr "type" "vecperm")])
4170 ;; V4SF/V4SI interleave
4171 (define_insn "vsx_xxmrghw_<mode>"
4172 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4174 (vec_concat:<VS_double>
4175 (match_operand:VSX_W 1 "vsx_register_operand" "wa")
4176 (match_operand:VSX_W 2 "vsx_register_operand" "wa"))
4177 (parallel [(const_int 0) (const_int 4)
4178 (const_int 1) (const_int 5)])))]
4179 "VECTOR_MEM_VSX_P (<MODE>mode)"
4181 if (BYTES_BIG_ENDIAN)
4182 return "xxmrghw %x0,%x1,%x2";
4184 return "xxmrglw %x0,%x2,%x1";
4186 [(set_attr "type" "vecperm")])
4188 (define_insn "vsx_xxmrglw_<mode>"
4189 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4191 (vec_concat:<VS_double>
4192 (match_operand:VSX_W 1 "vsx_register_operand" "wa")
4193 (match_operand:VSX_W 2 "vsx_register_operand" "wa"))
4194 (parallel [(const_int 2) (const_int 6)
4195 (const_int 3) (const_int 7)])))]
4196 "VECTOR_MEM_VSX_P (<MODE>mode)"
4198 if (BYTES_BIG_ENDIAN)
4199 return "xxmrglw %x0,%x1,%x2";
4201 return "xxmrghw %x0,%x2,%x1";
4203 [(set_attr "type" "vecperm")])
4205 ;; Shift left double by word immediate
4206 (define_insn "vsx_xxsldwi_<mode>"
4207 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=wa")
4208 (unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "wa")
4209 (match_operand:VSX_L 2 "vsx_register_operand" "wa")
4210 (match_operand:QI 3 "u5bit_cint_operand" "i")]
4212 "VECTOR_MEM_VSX_P (<MODE>mode)"
4213 "xxsldwi %x0,%x1,%x2,%3"
4214 [(set_attr "type" "vecperm")
4215 (set_attr "isa" "<VSisa>")])
4218 ;; Vector reduction insns and splitters
4220 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v2df"
4221 [(set (match_operand:V2DF 0 "vfloat_operand" "=&wa,wa")
4225 (match_operand:V2DF 1 "vfloat_operand" "wa,wa")
4226 (parallel [(const_int 1)]))
4229 (parallel [(const_int 0)])))
4231 (clobber (match_scratch:V2DF 2 "=0,&wa"))]
4232 "VECTOR_UNIT_VSX_P (V2DFmode)"
4237 rtx tmp = (GET_CODE (operands[2]) == SCRATCH)
4238 ? gen_reg_rtx (V2DFmode)
4240 emit_insn (gen_vsx_xxsldwi_v2df (tmp, operands[1], operands[1], const2_rtx));
4241 emit_insn (gen_<VEC_reduc_rtx>v2df3 (operands[0], tmp, operands[1]));
4244 [(set_attr "length" "8")
4245 (set_attr "type" "veccomplex")])
4247 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v4sf"
4248 [(set (match_operand:V4SF 0 "vfloat_operand" "=wa")
4250 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
4251 (match_operand:V4SF 1 "vfloat_operand" "wa")))
4252 (clobber (match_scratch:V4SF 2 "=&wa"))
4253 (clobber (match_scratch:V4SF 3 "=&wa"))]
4254 "VECTOR_UNIT_VSX_P (V4SFmode)"
4259 rtx op0 = operands[0];
4260 rtx op1 = operands[1];
4261 rtx tmp2, tmp3, tmp4;
4263 if (can_create_pseudo_p ())
4265 tmp2 = gen_reg_rtx (V4SFmode);
4266 tmp3 = gen_reg_rtx (V4SFmode);
4267 tmp4 = gen_reg_rtx (V4SFmode);
4276 emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
4277 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
4278 emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
4279 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (op0, tmp4, tmp3));
4282 [(set_attr "length" "16")
4283 (set_attr "type" "veccomplex")])
4285 ;; Combiner patterns with the vector reduction patterns that knows we can get
4286 ;; to the top element of the V2DF array without doing an extract.
4288 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar"
4289 [(set (match_operand:DF 0 "vfloat_operand" "=&wa,wa")
4294 (match_operand:V2DF 1 "vfloat_operand" "wa,wa")
4295 (parallel [(const_int 1)]))
4298 (parallel [(const_int 0)])))
4300 (parallel [(const_int 1)])))
4301 (clobber (match_scratch:DF 2 "=0,&wa"))]
4302 "BYTES_BIG_ENDIAN && VECTOR_UNIT_VSX_P (V2DFmode)"
4307 rtx hi = gen_highpart (DFmode, operands[1]);
4308 rtx lo = (GET_CODE (operands[2]) == SCRATCH)
4309 ? gen_reg_rtx (DFmode)
4312 emit_insn (gen_vsx_extract_v2df (lo, operands[1], const1_rtx));
4313 emit_insn (gen_<VEC_reduc_rtx>df3 (operands[0], hi, lo));
4316 [(set_attr "length" "8")
4317 (set_attr "type" "veccomplex")])
4319 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf_scalar"
4320 [(set (match_operand:SF 0 "vfloat_operand" "=f")
4323 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
4324 (match_operand:V4SF 1 "vfloat_operand" "wa"))
4325 (parallel [(const_int 3)])))
4326 (clobber (match_scratch:V4SF 2 "=&wa"))
4327 (clobber (match_scratch:V4SF 3 "=&wa"))
4328 (clobber (match_scratch:V4SF 4 "=0"))]
4329 "BYTES_BIG_ENDIAN && VECTOR_UNIT_VSX_P (V4SFmode)"
4334 rtx op0 = operands[0];
4335 rtx op1 = operands[1];
4336 rtx tmp2, tmp3, tmp4, tmp5;
4338 if (can_create_pseudo_p ())
4340 tmp2 = gen_reg_rtx (V4SFmode);
4341 tmp3 = gen_reg_rtx (V4SFmode);
4342 tmp4 = gen_reg_rtx (V4SFmode);
4343 tmp5 = gen_reg_rtx (V4SFmode);
4353 emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
4354 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
4355 emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
4356 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp5, tmp4, tmp3));
4357 emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp5));
4360 [(set_attr "length" "20")
4361 (set_attr "type" "veccomplex")])
4364 ;; Power8 Vector fusion. The fused ops must be physically adjacent.
4366 [(set (match_operand:P 0 "base_reg_operand")
4367 (match_operand:P 1 "short_cint_operand"))
4368 (set (match_operand:VSX_M 2 "vsx_register_operand")
4369 (mem:VSX_M (plus:P (match_dup 0)
4370 (match_operand:P 3 "int_reg_operand"))))]
4371 "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
4372 "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"
4373 [(set_attr "length" "8")
4374 (set_attr "type" "vecload")])
4377 [(set (match_operand:P 0 "base_reg_operand")
4378 (match_operand:P 1 "short_cint_operand"))
4379 (set (match_operand:VSX_M 2 "vsx_register_operand")
4380 (mem:VSX_M (plus:P (match_operand:P 3 "int_reg_operand")
4382 "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
4383 "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"
4384 [(set_attr "length" "8")
4385 (set_attr "type" "vecload")])
4388 ;; ISA 3.0 vector extend sign support
4390 (define_insn "vsx_sign_extend_qi_<mode>"
4391 [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
4393 [(match_operand:V16QI 1 "vsx_register_operand" "v")]
4394 UNSPEC_VSX_SIGN_EXTEND))]
4397 [(set_attr "type" "vecexts")])
4399 (define_insn "vsx_sign_extend_hi_<mode>"
4400 [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
4402 [(match_operand:V8HI 1 "vsx_register_operand" "v")]
4403 UNSPEC_VSX_SIGN_EXTEND))]
4406 [(set_attr "type" "vecexts")])
4408 (define_insn "*vsx_sign_extend_si_v2di"
4409 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
4410 (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
4411 UNSPEC_VSX_SIGN_EXTEND))]
4414 [(set_attr "type" "vecexts")])
4417 ;; ISA 3.0 Binary Floating-Point Support
4419 ;; VSX Scalar Extract Exponent Quad-Precision
4420 (define_insn "xsxexpqp_<mode>"
4421 [(set (match_operand:DI 0 "altivec_register_operand" "=v")
4422 (unspec:DI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
4423 UNSPEC_VSX_SXEXPDP))]
4426 [(set_attr "type" "vecmove")])
4428 ;; VSX Scalar Extract Exponent Double-Precision
4429 (define_insn "xsxexpdp"
4430 [(set (match_operand:DI 0 "register_operand" "=r")
4431 (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
4432 UNSPEC_VSX_SXEXPDP))]
4433 "TARGET_P9_VECTOR && TARGET_64BIT"
4435 [(set_attr "type" "integer")])
4437 ;; VSX Scalar Extract Significand Quad-Precision
4438 (define_insn "xsxsigqp_<mode>"
4439 [(set (match_operand:TI 0 "altivec_register_operand" "=v")
4440 (unspec:TI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
4444 [(set_attr "type" "vecmove")])
4446 ;; VSX Scalar Extract Significand Double-Precision
4447 (define_insn "xsxsigdp"
4448 [(set (match_operand:DI 0 "register_operand" "=r")
4449 (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
4451 "TARGET_P9_VECTOR && TARGET_64BIT"
4453 [(set_attr "type" "integer")])
4455 ;; VSX Scalar Insert Exponent Quad-Precision Floating Point Argument
4456 (define_insn "xsiexpqpf_<mode>"
4457 [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
4459 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4460 (match_operand:DI 2 "altivec_register_operand" "v")]
4461 UNSPEC_VSX_SIEXPQP))]
4464 [(set_attr "type" "vecmove")])
4466 ;; VSX Scalar Insert Exponent Quad-Precision
4467 (define_insn "xsiexpqp_<mode>"
4468 [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
4469 (unspec:IEEE128 [(match_operand:TI 1 "altivec_register_operand" "v")
4470 (match_operand:DI 2 "altivec_register_operand" "v")]
4471 UNSPEC_VSX_SIEXPQP))]
4474 [(set_attr "type" "vecmove")])
4476 ;; VSX Scalar Insert Exponent Double-Precision
4477 (define_insn "xsiexpdp"
4478 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
4479 (unspec:DF [(match_operand:DI 1 "register_operand" "r")
4480 (match_operand:DI 2 "register_operand" "r")]
4481 UNSPEC_VSX_SIEXPDP))]
4482 "TARGET_P9_VECTOR && TARGET_64BIT"
4483 "xsiexpdp %x0,%1,%2"
4484 [(set_attr "type" "fpsimple")])
4486 ;; VSX Scalar Insert Exponent Double-Precision Floating Point Argument
4487 (define_insn "xsiexpdpf"
4488 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
4489 (unspec:DF [(match_operand:DF 1 "register_operand" "r")
4490 (match_operand:DI 2 "register_operand" "r")]
4491 UNSPEC_VSX_SIEXPDP))]
4492 "TARGET_P9_VECTOR && TARGET_64BIT"
4493 "xsiexpdp %x0,%1,%2"
4494 [(set_attr "type" "fpsimple")])
4496 ;; VSX Scalar Compare Exponents Double-Precision
4497 (define_expand "xscmpexpdp_<code>"
4501 [(match_operand:DF 1 "vsx_register_operand" "wa")
4502 (match_operand:DF 2 "vsx_register_operand" "wa")]
4503 UNSPEC_VSX_SCMPEXPDP)
4505 (set (match_operand:SI 0 "register_operand" "=r")
4506 (CMP_TEST:SI (match_dup 3)
4510 operands[3] = gen_reg_rtx (CCFPmode);
4513 (define_insn "*xscmpexpdp"
4514 [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
4516 (unspec:DF [(match_operand:DF 1 "vsx_register_operand" "wa")
4517 (match_operand:DF 2 "vsx_register_operand" "wa")]
4518 UNSPEC_VSX_SCMPEXPDP)
4519 (match_operand:SI 3 "zero_constant" "j")))]
4521 "xscmpexpdp %0,%x1,%x2"
4522 [(set_attr "type" "fpcompare")])
4524 ;; VSX Scalar Compare Exponents Quad-Precision
4525 (define_expand "xscmpexpqp_<code>_<mode>"
4529 [(match_operand:IEEE128 1 "vsx_register_operand" "v")
4530 (match_operand:IEEE128 2 "vsx_register_operand" "v")]
4531 UNSPEC_VSX_SCMPEXPQP)
4533 (set (match_operand:SI 0 "register_operand" "=r")
4534 (CMP_TEST:SI (match_dup 3)
4538 operands[3] = gen_reg_rtx (CCFPmode);
4541 (define_insn "*xscmpexpqp"
4542 [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
4544 (unspec:IEEE128 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4545 (match_operand:IEEE128 2 "altivec_register_operand" "v")]
4546 UNSPEC_VSX_SCMPEXPQP)
4547 (match_operand:SI 3 "zero_constant" "j")))]
4549 "xscmpexpqp %0,%1,%2"
4550 [(set_attr "type" "fpcompare")])
4552 ;; VSX Scalar Test Data Class Quad-Precision
4553 ;; (Expansion for scalar_test_data_class (__ieee128, int))
4554 ;; (Has side effect of setting the lt bit if operand 1 is negative,
4555 ;; setting the eq bit if any of the conditions tested by operand 2
4556 ;; are satisfied, and clearing the gt and undordered bits to zero.)
4557 (define_expand "xststdcqp_<mode>"
4561 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4562 (match_operand:SI 2 "u7bit_cint_operand" "n")]
4565 (set (match_operand:SI 0 "register_operand" "=r")
4566 (eq:SI (match_dup 3)
4570 operands[3] = gen_reg_rtx (CCFPmode);
4573 ;; VSX Scalar Test Data Class Double- and Single-Precision
4574 ;; (The lt bit is set if operand 1 is negative. The eq bit is set
4575 ;; if any of the conditions tested by operand 2 are satisfied.
4576 ;; The gt and unordered bits are cleared to zero.)
4577 (define_expand "xststdc<sd>p"
4581 [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4582 (match_operand:SI 2 "u7bit_cint_operand" "n")]
4585 (set (match_operand:SI 0 "register_operand" "=r")
4586 (eq:SI (match_dup 3)
4590 operands[3] = gen_reg_rtx (CCFPmode);
4591 operands[4] = CONST0_RTX (SImode);
4594 ;; The VSX Scalar Test Negative Quad-Precision
4595 (define_expand "xststdcnegqp_<mode>"
4599 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4603 (set (match_operand:SI 0 "register_operand" "=r")
4604 (lt:SI (match_dup 2)
4608 operands[2] = gen_reg_rtx (CCFPmode);
4611 ;; The VSX Scalar Test Negative Double- and Single-Precision
4612 (define_expand "xststdcneg<sd>p"
4616 [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4620 (set (match_operand:SI 0 "register_operand" "=r")
4621 (lt:SI (match_dup 2)
4625 operands[2] = gen_reg_rtx (CCFPmode);
4626 operands[3] = CONST0_RTX (SImode);
4629 (define_insn "*xststdcqp_<mode>"
4630 [(set (match_operand:CCFP 0 "" "=y")
4633 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4634 (match_operand:SI 2 "u7bit_cint_operand" "n")]
4638 "xststdcqp %0,%1,%2"
4639 [(set_attr "type" "fpcompare")])
4641 (define_insn "*xststdc<sd>p"
4642 [(set (match_operand:CCFP 0 "" "=y")
4644 (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4645 (match_operand:SI 2 "u7bit_cint_operand" "n")]
4647 (match_operand:SI 3 "zero_constant" "j")))]
4649 "xststdc<sd>p %0,%x1,%2"
4650 [(set_attr "type" "fpcompare")])
4652 ;; VSX Vector Extract Exponent Double and Single Precision
4653 (define_insn "xvxexp<sd>p"
4654 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4656 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
4659 "xvxexp<sd>p %x0,%x1"
4660 [(set_attr "type" "vecsimple")])
4662 ;; VSX Vector Extract Significand Double and Single Precision
4663 (define_insn "xvxsig<sd>p"
4664 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4666 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
4669 "xvxsig<sd>p %x0,%x1"
4670 [(set_attr "type" "vecsimple")])
4672 ;; VSX Vector Insert Exponent Double and Single Precision
4673 (define_insn "xviexp<sd>p"
4674 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4676 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
4677 (match_operand:VSX_F 2 "vsx_register_operand" "wa")]
4680 "xviexp<sd>p %x0,%x1,%x2"
4681 [(set_attr "type" "vecsimple")])
4683 ;; VSX Vector Test Data Class Double and Single Precision
4684 ;; The corresponding elements of the result vector are all ones
4685 ;; if any of the conditions tested by operand 3 are satisfied.
4686 (define_insn "xvtstdc<sd>p"
4687 [(set (match_operand:<VSI> 0 "vsx_register_operand" "=wa")
4689 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
4690 (match_operand:SI 2 "u7bit_cint_operand" "n")]
4691 UNSPEC_VSX_VTSTDC))]
4693 "xvtstdc<sd>p %x0,%x1,%2"
4694 [(set_attr "type" "vecsimple")])
4696 ;; ISA 3.0 String Operations Support
4698 ;; Compare vectors producing a vector result and a predicate, setting CR6
4699 ;; to indicate a combined status. This pattern matches v16qi, v8hi, and
4700 ;; v4si modes. It does not match v2df, v4sf, or v2di modes. There's no
4701 ;; need to match v4sf, v2df, or v2di modes because those are expanded
4702 ;; to use Power8 instructions.
4703 (define_insn "*vsx_ne_<mode>_p"
4704 [(set (reg:CC CR6_REGNO)
4706 [(ne:CC (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
4707 (match_operand:VSX_EXTRACT_I 2 "gpc_reg_operand" "v"))]
4709 (set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=v")
4710 (ne:VSX_EXTRACT_I (match_dup 1)
4713 "vcmpne<VSX_EXTRACT_WIDTH>. %0,%1,%2"
4714 [(set_attr "type" "vecsimple")])
4716 (define_insn "*vector_nez_<mode>_p"
4717 [(set (reg:CC CR6_REGNO)
4718 (unspec:CC [(unspec:VI
4719 [(match_operand:VI 1 "gpc_reg_operand" "v")
4720 (match_operand:VI 2 "gpc_reg_operand" "v")]
4723 (set (match_operand:VI 0 "gpc_reg_operand" "=v")
4724 (unspec:VI [(match_dup 1)
4728 "vcmpnez<VSX_EXTRACT_WIDTH>. %0,%1,%2"
4729 [(set_attr "type" "vecsimple")])
4731 ;; Return first position of match between vectors using natural order
4732 ;; for both LE and BE execution modes.
4733 (define_expand "first_match_index_<mode>"
4734 [(match_operand:SI 0 "register_operand")
4735 (unspec:SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4736 (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4737 UNSPEC_VSX_FIRST_MATCH_INDEX)]
4742 rtx cmp_result = gen_reg_rtx (<MODE>mode);
4743 rtx not_result = gen_reg_rtx (<MODE>mode);
4745 emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
4747 emit_insn (gen_one_cmpl<mode>2 (not_result, cmp_result));
4749 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4751 if (<MODE>mode == V16QImode)
4753 if (!BYTES_BIG_ENDIAN)
4754 emit_insn (gen_vctzlsbb_<mode> (operands[0], not_result));
4756 emit_insn (gen_vclzlsbb_<mode> (operands[0], not_result));
4760 rtx tmp = gen_reg_rtx (SImode);
4761 if (!BYTES_BIG_ENDIAN)
4762 emit_insn (gen_vctzlsbb_<mode> (tmp, not_result));
4764 emit_insn (gen_vclzlsbb_<mode> (tmp, not_result));
4765 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
4770 ;; Return first position of match between vectors or end of string (EOS) using
4771 ;; natural element order for both LE and BE execution modes.
4772 (define_expand "first_match_or_eos_index_<mode>"
4773 [(match_operand:SI 0 "register_operand")
4774 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4775 (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4776 UNSPEC_VSX_FIRST_MATCH_EOS_INDEX)]
4780 rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
4781 rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
4782 rtx cmpz_result = gen_reg_rtx (<MODE>mode);
4783 rtx and_result = gen_reg_rtx (<MODE>mode);
4784 rtx result = gen_reg_rtx (<MODE>mode);
4785 rtx vzero = gen_reg_rtx (<MODE>mode);
4787 /* Vector with zeros in elements that correspond to zeros in operands. */
4788 emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
4789 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
4790 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
4791 emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
4793 /* Vector with ones in elments that do not match. */
4794 emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
4797 /* Create vector with ones in elements where there was a zero in one of
4798 the source elements or the elements that match. */
4799 emit_insn (gen_nand<mode>3 (result, and_result, cmpz_result));
4800 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4802 if (<MODE>mode == V16QImode)
4804 if (!BYTES_BIG_ENDIAN)
4805 emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
4807 emit_insn (gen_vclzlsbb_<mode> (operands[0], result));
4811 rtx tmp = gen_reg_rtx (SImode);
4812 if (!BYTES_BIG_ENDIAN)
4813 emit_insn (gen_vctzlsbb_<mode> (tmp, result));
4815 emit_insn (gen_vclzlsbb_<mode> (tmp, result));
4816 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
4821 ;; Return first position of mismatch between vectors using natural
4822 ;; element order for both LE and BE execution modes.
4823 (define_expand "first_mismatch_index_<mode>"
4824 [(match_operand:SI 0 "register_operand")
4825 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4826 (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4827 UNSPEC_VSX_FIRST_MISMATCH_INDEX)]
4831 rtx cmp_result = gen_reg_rtx (<MODE>mode);
4833 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
4835 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4837 if (<MODE>mode == V16QImode)
4839 if (!BYTES_BIG_ENDIAN)
4840 emit_insn (gen_vctzlsbb_<mode> (operands[0], cmp_result));
4842 emit_insn (gen_vclzlsbb_<mode> (operands[0], cmp_result));
4846 rtx tmp = gen_reg_rtx (SImode);
4847 if (!BYTES_BIG_ENDIAN)
4848 emit_insn (gen_vctzlsbb_<mode> (tmp, cmp_result));
4850 emit_insn (gen_vclzlsbb_<mode> (tmp, cmp_result));
4851 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
4856 ;; Return first position of mismatch between vectors or end of string (EOS)
4857 ;; using natural element order for both LE and BE execution modes.
4858 (define_expand "first_mismatch_or_eos_index_<mode>"
4859 [(match_operand:SI 0 "register_operand")
4860 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4861 (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4862 UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX)]
4866 rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
4867 rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
4868 rtx cmpz_result = gen_reg_rtx (<MODE>mode);
4869 rtx not_cmpz_result = gen_reg_rtx (<MODE>mode);
4870 rtx and_result = gen_reg_rtx (<MODE>mode);
4871 rtx result = gen_reg_rtx (<MODE>mode);
4872 rtx vzero = gen_reg_rtx (<MODE>mode);
4874 /* Vector with zeros in elements that correspond to zeros in operands. */
4875 emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
4877 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
4878 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
4879 emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
4881 /* Vector with ones in elments that match. */
4882 emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
4884 emit_insn (gen_one_cmpl<mode>2 (not_cmpz_result, cmpz_result));
4886 /* Create vector with ones in elements where there was a zero in one of
4887 the source elements or the elements did not match. */
4888 emit_insn (gen_nand<mode>3 (result, and_result, not_cmpz_result));
4889 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4891 if (<MODE>mode == V16QImode)
4893 if (!BYTES_BIG_ENDIAN)
4894 emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
4896 emit_insn (gen_vclzlsbb_<mode> (operands[0], result));
4900 rtx tmp = gen_reg_rtx (SImode);
4901 if (!BYTES_BIG_ENDIAN)
4902 emit_insn (gen_vctzlsbb_<mode> (tmp, result));
4904 emit_insn (gen_vclzlsbb_<mode> (tmp, result));
4905 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
4910 ;; Load VSX Vector with Length
4911 (define_expand "lxvl"
4913 (ashift:DI (match_operand:DI 2 "register_operand")
4915 (set (match_operand:V16QI 0 "vsx_register_operand")
4917 [(match_operand:DI 1 "gpc_reg_operand")
4918 (mem:V16QI (match_dup 1))
4921 "TARGET_P9_VECTOR && TARGET_64BIT"
4923 operands[3] = gen_reg_rtx (DImode);
4926 (define_insn "*lxvl"
4927 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
4929 [(match_operand:DI 1 "gpc_reg_operand" "b")
4930 (mem:V16QI (match_dup 1))
4931 (match_operand:DI 2 "register_operand" "r")]
4933 "TARGET_P9_VECTOR && TARGET_64BIT"
4935 [(set_attr "type" "vecload")])
4937 (define_insn "lxvll"
4938 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
4939 (unspec:V16QI [(match_operand:DI 1 "gpc_reg_operand" "b")
4940 (mem:V16QI (match_dup 1))
4941 (match_operand:DI 2 "register_operand" "r")]
4945 [(set_attr "type" "vecload")])
4947 ;; Expand for builtin xl_len_r
4948 (define_expand "xl_len_r"
4949 [(match_operand:V16QI 0 "vsx_register_operand")
4950 (match_operand:DI 1 "register_operand")
4951 (match_operand:DI 2 "register_operand")]
4954 rtx shift_mask = gen_reg_rtx (V16QImode);
4955 rtx rtx_vtmp = gen_reg_rtx (V16QImode);
4956 rtx tmp = gen_reg_rtx (DImode);
4958 emit_insn (gen_altivec_lvsl_reg (shift_mask, operands[2]));
4959 emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
4960 emit_insn (gen_lxvll (rtx_vtmp, operands[1], tmp));
4961 emit_insn (gen_altivec_vperm_v8hiv16qi (operands[0], rtx_vtmp, rtx_vtmp,
4966 (define_insn "stxvll"
4967 [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
4968 (unspec:V16QI [(match_operand:V16QI 0 "vsx_register_operand" "wa")
4969 (mem:V16QI (match_dup 1))
4970 (match_operand:DI 2 "register_operand" "r")]
4974 [(set_attr "type" "vecstore")])
4976 ;; Store VSX Vector with Length
4977 (define_expand "stxvl"
4979 (ashift:DI (match_operand:DI 2 "register_operand")
4981 (set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand"))
4983 [(match_operand:V16QI 0 "vsx_register_operand")
4984 (mem:V16QI (match_dup 1))
4987 "TARGET_P9_VECTOR && TARGET_64BIT"
4989 operands[3] = gen_reg_rtx (DImode);
4992 (define_insn "*stxvl"
4993 [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
4995 [(match_operand:V16QI 0 "vsx_register_operand" "wa")
4996 (mem:V16QI (match_dup 1))
4997 (match_operand:DI 2 "register_operand" "r")]
4999 "TARGET_P9_VECTOR && TARGET_64BIT"
5001 [(set_attr "type" "vecstore")])
5003 ;; Expand for builtin xst_len_r
5004 (define_expand "xst_len_r"
5005 [(match_operand:V16QI 0 "vsx_register_operand" "=wa")
5006 (match_operand:DI 1 "register_operand" "b")
5007 (match_operand:DI 2 "register_operand" "r")]
5010 rtx shift_mask = gen_reg_rtx (V16QImode);
5011 rtx rtx_vtmp = gen_reg_rtx (V16QImode);
5012 rtx tmp = gen_reg_rtx (DImode);
5014 emit_insn (gen_altivec_lvsr_reg (shift_mask, operands[2]));
5015 emit_insn (gen_altivec_vperm_v8hiv16qi (rtx_vtmp, operands[0], operands[0],
5017 emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
5018 emit_insn (gen_stxvll (rtx_vtmp, operands[1], tmp));
5022 ;; Vector Compare Not Equal Byte (specified/not+eq:)
5023 (define_insn "vcmpneb"
5024 [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5026 (eq:V16QI (match_operand:V16QI 1 "altivec_register_operand" "v")
5027 (match_operand:V16QI 2 "altivec_register_operand" "v"))))]
5030 [(set_attr "type" "vecsimple")])
5032 ;; Vector Compare Not Equal or Zero Byte
5033 (define_insn "vcmpnezb"
5034 [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5036 [(match_operand:V16QI 1 "altivec_register_operand" "v")
5037 (match_operand:V16QI 2 "altivec_register_operand" "v")]
5041 [(set_attr "type" "vecsimple")])
5043 ;; Vector Compare Not Equal or Zero Byte predicate or record-form
5044 (define_insn "vcmpnezb_p"
5045 [(set (reg:CC CR6_REGNO)
5047 [(match_operand:V16QI 1 "altivec_register_operand" "v")
5048 (match_operand:V16QI 2 "altivec_register_operand" "v")]
5050 (set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5056 "vcmpnezb. %0,%1,%2"
5057 [(set_attr "type" "vecsimple")])
5059 ;; Vector Compare Not Equal Half Word (specified/not+eq:)
5060 (define_insn "vcmpneh"
5061 [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
5063 (eq:V8HI (match_operand:V8HI 1 "altivec_register_operand" "v")
5064 (match_operand:V8HI 2 "altivec_register_operand" "v"))))]
5067 [(set_attr "type" "vecsimple")])
5069 ;; Vector Compare Not Equal or Zero Half Word
5070 (define_insn "vcmpnezh"
5071 [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
5072 (unspec:V8HI [(match_operand:V8HI 1 "altivec_register_operand" "v")
5073 (match_operand:V8HI 2 "altivec_register_operand" "v")]
5077 [(set_attr "type" "vecsimple")])
5079 ;; Vector Compare Not Equal Word (specified/not+eq:)
5080 (define_insn "vcmpnew"
5081 [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
5083 (eq:V4SI (match_operand:V4SI 1 "altivec_register_operand" "v")
5084 (match_operand:V4SI 2 "altivec_register_operand" "v"))))]
5087 [(set_attr "type" "vecsimple")])
5089 ;; Vector Compare Not Equal or Zero Word
5090 (define_insn "vcmpnezw"
5091 [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
5092 (unspec:V4SI [(match_operand:V4SI 1 "altivec_register_operand" "v")
5093 (match_operand:V4SI 2 "altivec_register_operand" "v")]
5097 [(set_attr "type" "vecsimple")])
5099 ;; Vector Count Leading Zero Least-Significant Bits Byte
5100 (define_insn "vclzlsbb_<mode>"
5101 [(set (match_operand:SI 0 "register_operand" "=r")
5103 [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
5107 [(set_attr "type" "vecsimple")])
5109 ;; Vector Count Trailing Zero Least-Significant Bits Byte
5110 (define_insn "vctzlsbb_<mode>"
5111 [(set (match_operand:SI 0 "register_operand" "=r")
5113 [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
5117 [(set_attr "type" "vecsimple")])
5119 ;; Vector Extract Unsigned Byte Left-Indexed
5120 (define_insn "vextublx"
5121 [(set (match_operand:SI 0 "register_operand" "=r")
5123 [(match_operand:SI 1 "register_operand" "r")
5124 (match_operand:V16QI 2 "altivec_register_operand" "v")]
5128 [(set_attr "type" "vecsimple")])
5130 ;; Vector Extract Unsigned Byte Right-Indexed
5131 (define_insn "vextubrx"
5132 [(set (match_operand:SI 0 "register_operand" "=r")
5134 [(match_operand:SI 1 "register_operand" "r")
5135 (match_operand:V16QI 2 "altivec_register_operand" "v")]
5139 [(set_attr "type" "vecsimple")])
5141 ;; Vector Extract Unsigned Half Word Left-Indexed
5142 (define_insn "vextuhlx"
5143 [(set (match_operand:SI 0 "register_operand" "=r")
5145 [(match_operand:SI 1 "register_operand" "r")
5146 (match_operand:V8HI 2 "altivec_register_operand" "v")]
5150 [(set_attr "type" "vecsimple")])
5152 ;; Vector Extract Unsigned Half Word Right-Indexed
5153 (define_insn "vextuhrx"
5154 [(set (match_operand:SI 0 "register_operand" "=r")
5156 [(match_operand:SI 1 "register_operand" "r")
5157 (match_operand:V8HI 2 "altivec_register_operand" "v")]
5161 [(set_attr "type" "vecsimple")])
5163 ;; Vector Extract Unsigned Word Left-Indexed
5164 (define_insn "vextuwlx"
5165 [(set (match_operand:SI 0 "register_operand" "=r")
5167 [(match_operand:SI 1 "register_operand" "r")
5168 (match_operand:V4SI 2 "altivec_register_operand" "v")]
5172 [(set_attr "type" "vecsimple")])
5174 ;; Vector Extract Unsigned Word Right-Indexed
5175 (define_insn "vextuwrx"
5176 [(set (match_operand:SI 0 "register_operand" "=r")
5178 [(match_operand:SI 1 "register_operand" "r")
5179 (match_operand:V4SI 2 "altivec_register_operand" "v")]
5183 [(set_attr "type" "vecsimple")])
5185 ;; Vector insert/extract word at arbitrary byte values. Note, the little
5186 ;; endian version needs to adjust the byte number, and the V4SI element in
5188 (define_insn "extract4b"
5189 [(set (match_operand:V2DI 0 "vsx_register_operand")
5190 (unspec:V2DI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
5191 (match_operand:QI 2 "const_0_to_12_operand" "n")]
5192 UNSPEC_XXEXTRACTUW))]
5195 if (!BYTES_BIG_ENDIAN)
5196 operands[2] = GEN_INT (12 - INTVAL (operands[2]));
5198 return "xxextractuw %x0,%x1,%2";
5201 (define_expand "insert4b"
5202 [(set (match_operand:V16QI 0 "vsx_register_operand")
5203 (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand")
5204 (match_operand:V16QI 2 "vsx_register_operand")
5205 (match_operand:QI 3 "const_0_to_12_operand")]
5209 if (!BYTES_BIG_ENDIAN)
5211 rtx op1 = operands[1];
5212 rtx v4si_tmp = gen_reg_rtx (V4SImode);
5213 emit_insn (gen_vsx_xxpermdi_v4si_be (v4si_tmp, op1, op1, const1_rtx));
5214 operands[1] = v4si_tmp;
5215 operands[3] = GEN_INT (12 - INTVAL (operands[3]));
5219 (define_insn "*insert4b_internal"
5220 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5221 (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand" "wa")
5222 (match_operand:V16QI 2 "vsx_register_operand" "0")
5223 (match_operand:QI 3 "const_0_to_12_operand" "n")]
5226 "xxinsertw %x0,%x1,%3"
5227 [(set_attr "type" "vecperm")])
5230 ;; Generate vector extract four float 32 values from left four elements
5231 ;; of eight element vector of float 16 values.
5232 (define_expand "vextract_fp_from_shorth"
5233 [(set (match_operand:V4SF 0 "register_operand" "=wa")
5234 (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
5235 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH))]
5239 int vals_le[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0};
5240 int vals_be[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0};
5243 rtx mask = gen_reg_rtx (V16QImode);
5244 rtx tmp = gen_reg_rtx (V16QImode);
5247 for (i = 0; i < 16; i++)
5248 if (!BYTES_BIG_ENDIAN)
5249 rvals[i] = GEN_INT (vals_le[i]);
5251 rvals[i] = GEN_INT (vals_be[i]);
5253 /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
5254 inputs in half words 1,3,5,7 (IBM numbering). Use xxperm to move
5255 src half words 0,1,2,3 (LE), src half words 4,5,6,7 (BE) for the
5256 conversion instruction. */
5257 v = gen_rtvec_v (16, rvals);
5258 emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
5259 emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
5260 operands[1], mask));
5261 emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
5265 ;; Generate vector extract four float 32 values from right four elements
5266 ;; of eight element vector of float 16 values.
5267 (define_expand "vextract_fp_from_shortl"
5268 [(set (match_operand:V4SF 0 "register_operand" "=wa")
5269 (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
5270 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL))]
5273 int vals_le[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0};
5274 int vals_be[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0};
5278 rtx mask = gen_reg_rtx (V16QImode);
5279 rtx tmp = gen_reg_rtx (V16QImode);
5282 for (i = 0; i < 16; i++)
5283 if (!BYTES_BIG_ENDIAN)
5284 rvals[i] = GEN_INT (vals_le[i]);
5286 rvals[i] = GEN_INT (vals_be[i]);
5288 /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
5289 inputs in half words 1,3,5,7 (IBM numbering). Use xxperm to move
5290 src half words 4,5,6,7 (LE), src half words 0,1,2,3 (BE) for the
5291 conversion instruction. */
5292 v = gen_rtvec_v (16, rvals);
5293 emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
5294 emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
5295 operands[1], mask));
5296 emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
5300 ;; Support for ISA 3.0 vector byte reverse
5302 ;; Swap all bytes with in a vector
5303 (define_insn "p9_xxbrq_v1ti"
5304 [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
5305 (bswap:V1TI (match_operand:V1TI 1 "vsx_register_operand" "wa")))]
5308 [(set_attr "type" "vecperm")])
5310 (define_expand "p9_xxbrq_v16qi"
5311 [(use (match_operand:V16QI 0 "vsx_register_operand" "=wa"))
5312 (use (match_operand:V16QI 1 "vsx_register_operand" "wa"))]
5315 rtx op0 = gen_reg_rtx (V1TImode);
5316 rtx op1 = gen_lowpart (V1TImode, operands[1]);
5317 emit_insn (gen_p9_xxbrq_v1ti (op0, op1));
5318 emit_move_insn (operands[0], gen_lowpart (V16QImode, op0));
5322 ;; Swap all bytes in each 64-bit element
5323 (define_insn "p9_xxbrd_v2di"
5324 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
5325 (bswap:V2DI (match_operand:V2DI 1 "vsx_register_operand" "wa")))]
5328 [(set_attr "type" "vecperm")])
5330 (define_expand "p9_xxbrd_v2df"
5331 [(use (match_operand:V2DF 0 "vsx_register_operand" "=wa"))
5332 (use (match_operand:V2DF 1 "vsx_register_operand" "wa"))]
5335 rtx op0 = gen_reg_rtx (V2DImode);
5336 rtx op1 = gen_lowpart (V2DImode, operands[1]);
5337 emit_insn (gen_p9_xxbrd_v2di (op0, op1));
5338 emit_move_insn (operands[0], gen_lowpart (V2DFmode, op0));
5342 ;; Swap all bytes in each 32-bit element
5343 (define_insn "p9_xxbrw_v4si"
5344 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
5345 (bswap:V4SI (match_operand:V4SI 1 "vsx_register_operand" "wa")))]
5348 [(set_attr "type" "vecperm")])
5350 (define_expand "p9_xxbrw_v4sf"
5351 [(use (match_operand:V4SF 0 "vsx_register_operand" "=wa"))
5352 (use (match_operand:V4SF 1 "vsx_register_operand" "wa"))]
5355 rtx op0 = gen_reg_rtx (V4SImode);
5356 rtx op1 = gen_lowpart (V4SImode, operands[1]);
5357 emit_insn (gen_p9_xxbrw_v4si (op0, op1));
5358 emit_move_insn (operands[0], gen_lowpart (V4SFmode, op0));
5362 ;; Swap all bytes in each element of vector
5363 (define_expand "revb_<mode>"
5364 [(use (match_operand:VEC_REVB 0 "vsx_register_operand"))
5365 (use (match_operand:VEC_REVB 1 "vsx_register_operand"))]
5368 if (TARGET_P9_VECTOR)
5369 emit_insn (gen_p9_xxbr<VSX_XXBR>_<mode> (operands[0], operands[1]));
5372 /* Want to have the elements in reverse order relative
5373 to the endian mode in use, i.e. in LE mode, put elements
5375 rtx sel = swap_endian_selector_for_mode(<MODE>mode);
5376 emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[1],
5383 ;; Reversing bytes in vector char is just a NOP.
5384 (define_expand "revb_v16qi"
5385 [(set (match_operand:V16QI 0 "vsx_register_operand")
5386 (bswap:V16QI (match_operand:V16QI 1 "vsx_register_operand")))]
5389 emit_move_insn (operands[0], operands[1]);
5393 ;; Swap all bytes in each 16-bit element
5394 (define_insn "p9_xxbrh_v8hi"
5395 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
5396 (bswap:V8HI (match_operand:V8HI 1 "vsx_register_operand" "wa")))]
5399 [(set_attr "type" "vecperm")])
5402 ;; Operand numbers for the following peephole2
5404 [(SFBOOL_TMP_GPR 0) ;; GPR temporary
5405 (SFBOOL_TMP_VSX 1) ;; vector temporary
5406 (SFBOOL_MFVSR_D 2) ;; move to gpr dest
5407 (SFBOOL_MFVSR_A 3) ;; move to gpr src
5408 (SFBOOL_BOOL_D 4) ;; and/ior/xor dest
5409 (SFBOOL_BOOL_A1 5) ;; and/ior/xor arg1
5410 (SFBOOL_BOOL_A2 6) ;; and/ior/xor arg1
5411 (SFBOOL_SHL_D 7) ;; shift left dest
5412 (SFBOOL_SHL_A 8) ;; shift left arg
5413 (SFBOOL_MTVSR_D 9) ;; move to vecter dest
5414 (SFBOOL_MFVSR_A_V4SF 10) ;; SFBOOL_MFVSR_A as V4SFmode
5415 (SFBOOL_BOOL_A_DI 11) ;; SFBOOL_BOOL_A1/A2 as DImode
5416 (SFBOOL_TMP_VSX_DI 12) ;; SFBOOL_TMP_VSX as DImode
5417 (SFBOOL_MTVSR_D_V4SF 13)]) ;; SFBOOL_MTVSRD_D as V4SFmode
5419 ;; Attempt to optimize some common GLIBC operations using logical operations to
5420 ;; pick apart SFmode operations. For example, there is code from e_powf.c
5421 ;; after macro expansion that looks like:
5426 ;; } ieee_float_shape_type;
5432 ;; ieee_float_shape_type gf_u;
5433 ;; gf_u.value = (t1);
5434 ;; (is) = gf_u.word;
5438 ;; ieee_float_shape_type sf_u;
5439 ;; sf_u.word = (is & 0xfffff000);
5440 ;; (t1) = sf_u.value;
5444 ;; This would result in two direct move operations (convert to memory format,
5445 ;; direct move to GPR, do the AND operation, direct move to VSX, convert to
5446 ;; scalar format). With this peephole, we eliminate the direct move to the
5447 ;; GPR, and instead move the integer mask value to the vector register after a
5448 ;; shift and do the VSX logical operation.
5450 ;; The insns for dealing with SFmode in GPR registers looks like:
5451 ;; (set (reg:V4SF reg2) (unspec:V4SF [(reg:SF reg1)] UNSPEC_VSX_CVDPSPN))
5453 ;; (set (reg:DI reg3) (unspec:DI [(reg:V4SF reg2)] UNSPEC_P8V_RELOAD_FROM_VSX))
5455 ;; (set (reg:DI reg4) (and:DI (reg:DI reg3) (reg:DI reg3)))
5457 ;; (set (reg:DI reg5) (ashift:DI (reg:DI reg4) (const_int 32)))
5459 ;; (set (reg:SF reg6) (unspec:SF [(reg:DI reg5)] UNSPEC_P8V_MTVSRD))
5461 ;; (set (reg:SF reg6) (unspec:SF [(reg:SF reg6)] UNSPEC_VSX_CVSPDPN))
5464 [(match_scratch:DI SFBOOL_TMP_GPR "r")
5465 (match_scratch:V4SF SFBOOL_TMP_VSX "wa")
5467 ;; MFVSRWZ (aka zero_extend)
5468 (set (match_operand:DI SFBOOL_MFVSR_D "int_reg_operand")
5470 (match_operand:SI SFBOOL_MFVSR_A "vsx_register_operand")))
5472 ;; AND/IOR/XOR operation on int
5473 (set (match_operand:SI SFBOOL_BOOL_D "int_reg_operand")
5474 (and_ior_xor:SI (match_operand:SI SFBOOL_BOOL_A1 "int_reg_operand")
5475 (match_operand:SI SFBOOL_BOOL_A2 "reg_or_cint_operand")))
5478 (set (match_operand:DI SFBOOL_SHL_D "int_reg_operand")
5479 (ashift:DI (match_operand:DI SFBOOL_SHL_A "int_reg_operand")
5483 (set (match_operand:SF SFBOOL_MTVSR_D "vsx_register_operand")
5484 (unspec:SF [(match_dup SFBOOL_SHL_D)] UNSPEC_P8V_MTVSRD))]
5486 "TARGET_POWERPC64 && TARGET_DIRECT_MOVE
5487 /* The REG_P (xxx) tests prevents SUBREG's, which allows us to use REGNO
5488 to compare registers, when the mode is different. */
5489 && REG_P (operands[SFBOOL_MFVSR_D]) && REG_P (operands[SFBOOL_BOOL_D])
5490 && REG_P (operands[SFBOOL_BOOL_A1]) && REG_P (operands[SFBOOL_SHL_D])
5491 && REG_P (operands[SFBOOL_SHL_A]) && REG_P (operands[SFBOOL_MTVSR_D])
5492 && (REG_P (operands[SFBOOL_BOOL_A2])
5493 || CONST_INT_P (operands[SFBOOL_BOOL_A2]))
5494 && (REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_MFVSR_D])
5495 || peep2_reg_dead_p (2, operands[SFBOOL_MFVSR_D]))
5496 && (REGNO (operands[SFBOOL_MFVSR_D]) == REGNO (operands[SFBOOL_BOOL_A1])
5497 || (REG_P (operands[SFBOOL_BOOL_A2])
5498 && REGNO (operands[SFBOOL_MFVSR_D])
5499 == REGNO (operands[SFBOOL_BOOL_A2])))
5500 && REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_SHL_A])
5501 && (REGNO (operands[SFBOOL_SHL_D]) == REGNO (operands[SFBOOL_BOOL_D])
5502 || peep2_reg_dead_p (3, operands[SFBOOL_BOOL_D]))
5503 && peep2_reg_dead_p (4, operands[SFBOOL_SHL_D])"
5504 [(set (match_dup SFBOOL_TMP_GPR)
5505 (ashift:DI (match_dup SFBOOL_BOOL_A_DI)
5508 (set (match_dup SFBOOL_TMP_VSX_DI)
5509 (match_dup SFBOOL_TMP_GPR))
5511 (set (match_dup SFBOOL_MTVSR_D_V4SF)
5512 (and_ior_xor:V4SF (match_dup SFBOOL_MFVSR_A_V4SF)
5513 (match_dup SFBOOL_TMP_VSX)))]
5515 rtx bool_a1 = operands[SFBOOL_BOOL_A1];
5516 rtx bool_a2 = operands[SFBOOL_BOOL_A2];
5517 int regno_mfvsr_d = REGNO (operands[SFBOOL_MFVSR_D]);
5518 int regno_mfvsr_a = REGNO (operands[SFBOOL_MFVSR_A]);
5519 int regno_tmp_vsx = REGNO (operands[SFBOOL_TMP_VSX]);
5520 int regno_mtvsr_d = REGNO (operands[SFBOOL_MTVSR_D]);
5522 if (CONST_INT_P (bool_a2))
5524 rtx tmp_gpr = operands[SFBOOL_TMP_GPR];
5525 emit_move_insn (tmp_gpr, bool_a2);
5526 operands[SFBOOL_BOOL_A_DI] = tmp_gpr;
5530 int regno_bool_a1 = REGNO (bool_a1);
5531 int regno_bool_a2 = REGNO (bool_a2);
5532 int regno_bool_a = (regno_mfvsr_d == regno_bool_a1
5533 ? regno_bool_a2 : regno_bool_a1);
5534 operands[SFBOOL_BOOL_A_DI] = gen_rtx_REG (DImode, regno_bool_a);
5537 operands[SFBOOL_MFVSR_A_V4SF] = gen_rtx_REG (V4SFmode, regno_mfvsr_a);
5538 operands[SFBOOL_TMP_VSX_DI] = gen_rtx_REG (DImode, regno_tmp_vsx);
5539 operands[SFBOOL_MTVSR_D_V4SF] = gen_rtx_REG (V4SFmode, regno_mtvsr_d);