2 ;; Copyright (C) 2009-2017 Free Software Foundation, Inc.
3 ;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com>
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published
9 ;; by the Free Software Foundation; either version 3, or (at your
10 ;; option) any later version.
12 ;; GCC is distributed in the hope that it will be useful, but WITHOUT
13 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 ;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 ;; License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 ;; Iterator for comparison types
22 (define_code_iterator CMP_TEST [eq lt gt unordered])
24 ;; Mode attribute for vector floate and floato conversions
25 (define_mode_attr VF_sxddp [(V2DI "sxd") (V2DF "dp")])
27 ;; Iterator for both scalar and vector floating point types supported by VSX
28 (define_mode_iterator VSX_B [DF V4SF V2DF])
30 ;; Iterator for the 2 64-bit vector types
31 (define_mode_iterator VSX_D [V2DF V2DI])
33 ;; Mode iterator to handle swapping words on little endian for the 128-bit
34 ;; types that goes in a single vector register.
35 (define_mode_iterator VSX_LE_128 [(KF "FLOAT128_VECTOR_P (KFmode)")
36 (TF "FLOAT128_VECTOR_P (TFmode)")
40 ;; Iterator for 128-bit integer types that go in a single vector register.
41 (define_mode_iterator VSX_TI [TI V1TI])
43 ;; Iterator for the 2 32-bit vector types
44 (define_mode_iterator VSX_W [V4SF V4SI])
46 ;; Iterator for the DF types
47 (define_mode_iterator VSX_DF [V2DF DF])
49 ;; Iterator for vector floating point types supported by VSX
50 (define_mode_iterator VSX_F [V4SF V2DF])
52 ;; Iterator for logical types supported by VSX
53 (define_mode_iterator VSX_L [V16QI
61 (KF "FLOAT128_VECTOR_P (KFmode)")
62 (TF "FLOAT128_VECTOR_P (TFmode)")])
64 ;; Iterator for memory moves.
65 (define_mode_iterator VSX_M [V16QI
72 (KF "FLOAT128_VECTOR_P (KFmode)")
73 (TF "FLOAT128_VECTOR_P (TFmode)")
76 ;; Map into the appropriate load/store name based on the type
77 (define_mode_attr VSm [(V16QI "vw4")
89 ;; Map into the appropriate suffix based on the type
90 (define_mode_attr VSs [(V16QI "sp")
103 ;; Map the register class used
104 (define_mode_attr VSr [(V16QI "v")
118 ;; Map the register class used for float<->int conversions (floating point side)
119 ;; VSr2 is the preferred register class, VSr3 is any register class that will
121 (define_mode_attr VSr2 [(V2DF "wd")
129 (define_mode_attr VSr3 [(V2DF "wa")
137 ;; Map the register class for sp<->dp float conversions, destination
138 (define_mode_attr VSr4 [(SF "ws")
143 ;; Map the register class for sp<->dp float conversions, source
144 (define_mode_attr VSr5 [(SF "ws")
149 ;; The VSX register class that a type can occupy, even if it is not the
150 ;; preferred register class (VSr is the preferred register class that will get
152 (define_mode_attr VSa [(V16QI "wa")
166 ;; Same size integer type for floating point data
167 (define_mode_attr VSi [(V4SF "v4si")
171 (define_mode_attr VSI [(V4SF "V4SI")
175 ;; Word size for same size conversion
176 (define_mode_attr VSc [(V4SF "w")
180 ;; Map into either s or v, depending on whether this is a scalar or vector
182 (define_mode_attr VSv [(V16QI "v")
192 ;; Appropriate type for add ops (and other simple FP ops)
193 (define_mode_attr VStype_simple [(V2DF "vecdouble")
197 (define_mode_attr VSfptype_simple [(V2DF "fp_addsub_d")
201 ;; Appropriate type for multiply ops
202 (define_mode_attr VStype_mul [(V2DF "vecdouble")
206 (define_mode_attr VSfptype_mul [(V2DF "fp_mul_d")
210 ;; Appropriate type for divide ops.
211 (define_mode_attr VStype_div [(V2DF "vecdiv")
215 (define_mode_attr VSfptype_div [(V2DF "fp_div_d")
219 ;; Appropriate type for sqrt ops. For now, just lump the vector sqrt with
221 (define_mode_attr VStype_sqrt [(V2DF "dsqrt")
225 (define_mode_attr VSfptype_sqrt [(V2DF "fp_sqrt_d")
229 ;; Iterator and modes for sp<->dp conversions
230 ;; Because scalar SF values are represented internally as double, use the
231 ;; V4SF type to represent this than SF.
232 (define_mode_iterator VSX_SPDP [DF V4SF V2DF])
234 (define_mode_attr VS_spdp_res [(DF "V4SF")
238 (define_mode_attr VS_spdp_insn [(DF "xscvdpsp")
242 (define_mode_attr VS_spdp_type [(DF "fp")
246 ;; Map the scalar mode for a vector type
247 (define_mode_attr VS_scalar [(V1TI "TI")
255 ;; Map to a double-sized vector mode
256 (define_mode_attr VS_double [(V4SI "V8SI")
262 ;; Map register class for 64-bit element in 128-bit vector for direct moves
264 (define_mode_attr VS_64dm [(V2DF "wk")
267 ;; Map register class for 64-bit element in 128-bit vector for normal register
269 (define_mode_attr VS_64reg [(V2DF "ws")
272 ;; Iterators for loading constants with xxspltib
273 (define_mode_iterator VSINT_84 [V4SI V2DI DI SI])
274 (define_mode_iterator VSINT_842 [V8HI V4SI V2DI])
276 ;; Iterator for ISA 3.0 vector extract/insert of small integer vectors.
277 ;; VSX_EXTRACT_I2 doesn't include V4SImode because SI extracts can be
278 ;; done on ISA 2.07 and not just ISA 3.0.
279 (define_mode_iterator VSX_EXTRACT_I [V16QI V8HI V4SI])
280 (define_mode_iterator VSX_EXTRACT_I2 [V16QI V8HI])
282 (define_mode_attr VSX_EXTRACT_WIDTH [(V16QI "b")
286 ;; Mode attribute to give the correct predicate for ISA 3.0 vector extract and
287 ;; insert to validate the operand number.
288 (define_mode_attr VSX_EXTRACT_PREDICATE [(V16QI "const_0_to_15_operand")
289 (V8HI "const_0_to_7_operand")
290 (V4SI "const_0_to_3_operand")])
292 ;; Mode attribute to give the constraint for vector extract and insert
294 (define_mode_attr VSX_EX [(V16QI "v")
298 ;; Mode iterator for binary floating types other than double to
299 ;; optimize convert to that floating point type from an extract
300 ;; of an integer type
301 (define_mode_iterator VSX_EXTRACT_FL [SF
302 (IF "FLOAT128_2REG_P (IFmode)")
303 (KF "TARGET_FLOAT128_HW")
304 (TF "FLOAT128_2REG_P (TFmode)
305 || (FLOAT128_IEEE_P (TFmode)
306 && TARGET_FLOAT128_HW)")])
308 ;; Mode iterator for binary floating types that have a direct conversion
309 ;; from 64-bit integer to floating point
310 (define_mode_iterator FL_CONV [SF
312 (KF "TARGET_FLOAT128_HW")
313 (TF "TARGET_FLOAT128_HW
314 && FLOAT128_IEEE_P (TFmode)")])
316 ;; Iterator for the 2 short vector types to do a splat from an integer
317 (define_mode_iterator VSX_SPLAT_I [V16QI V8HI])
319 ;; Mode attribute to give the count for the splat instruction to splat
320 ;; the value in the 64-bit integer slot
321 (define_mode_attr VSX_SPLAT_COUNT [(V16QI "7") (V8HI "3")])
323 ;; Mode attribute to give the suffix for the splat instruction
324 (define_mode_attr VSX_SPLAT_SUFFIX [(V16QI "b") (V8HI "h")])
326 ;; Constants for creating unspecs
327 (define_c_enum "unspec"
344 UNSPEC_VSX_UNS_FLOAT2
346 UNSPEC_VSX_UNS_FLOATE
348 UNSPEC_VSX_UNS_FLOATO
364 UNSPEC_VSX_XVCVDPSXDS
365 UNSPEC_VSX_XVCVDPUXDS
366 UNSPEC_VSX_SIGN_EXTEND
367 UNSPEC_VSX_XVCVSPSXWS
368 UNSPEC_VSX_XVCVSPSXDS
377 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH
378 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL
415 ;; The patterns for LE permuted loads and stores come before the general
416 ;; VSX moves so they match first.
417 (define_insn_and_split "*vsx_le_perm_load_<mode>"
418 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>")
419 (match_operand:VSX_D 1 "memory_operand" "Z"))]
420 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
422 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
426 (parallel [(const_int 1) (const_int 0)])))
430 (parallel [(const_int 1) (const_int 0)])))]
433 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
437 [(set_attr "type" "vecload")
438 (set_attr "length" "8")])
440 (define_insn_and_split "*vsx_le_perm_load_<mode>"
441 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
442 (match_operand:VSX_W 1 "memory_operand" "Z"))]
443 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
445 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
449 (parallel [(const_int 2) (const_int 3)
450 (const_int 0) (const_int 1)])))
454 (parallel [(const_int 2) (const_int 3)
455 (const_int 0) (const_int 1)])))]
458 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
462 [(set_attr "type" "vecload")
463 (set_attr "length" "8")])
465 (define_insn_and_split "*vsx_le_perm_load_v8hi"
466 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
467 (match_operand:V8HI 1 "memory_operand" "Z"))]
468 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
470 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
474 (parallel [(const_int 4) (const_int 5)
475 (const_int 6) (const_int 7)
476 (const_int 0) (const_int 1)
477 (const_int 2) (const_int 3)])))
481 (parallel [(const_int 4) (const_int 5)
482 (const_int 6) (const_int 7)
483 (const_int 0) (const_int 1)
484 (const_int 2) (const_int 3)])))]
487 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
491 [(set_attr "type" "vecload")
492 (set_attr "length" "8")])
494 (define_insn_and_split "*vsx_le_perm_load_v16qi"
495 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
496 (match_operand:V16QI 1 "memory_operand" "Z"))]
497 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
499 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
503 (parallel [(const_int 8) (const_int 9)
504 (const_int 10) (const_int 11)
505 (const_int 12) (const_int 13)
506 (const_int 14) (const_int 15)
507 (const_int 0) (const_int 1)
508 (const_int 2) (const_int 3)
509 (const_int 4) (const_int 5)
510 (const_int 6) (const_int 7)])))
514 (parallel [(const_int 8) (const_int 9)
515 (const_int 10) (const_int 11)
516 (const_int 12) (const_int 13)
517 (const_int 14) (const_int 15)
518 (const_int 0) (const_int 1)
519 (const_int 2) (const_int 3)
520 (const_int 4) (const_int 5)
521 (const_int 6) (const_int 7)])))]
524 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
528 [(set_attr "type" "vecload")
529 (set_attr "length" "8")])
531 (define_insn "*vsx_le_perm_store_<mode>"
532 [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
533 (match_operand:VSX_D 1 "vsx_register_operand" "+<VSa>"))]
534 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
536 [(set_attr "type" "vecstore")
537 (set_attr "length" "12")])
540 [(set (match_operand:VSX_D 0 "memory_operand" "")
541 (match_operand:VSX_D 1 "vsx_register_operand" ""))]
542 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
546 (parallel [(const_int 1) (const_int 0)])))
550 (parallel [(const_int 1) (const_int 0)])))]
552 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
556 ;; The post-reload split requires that we re-permute the source
557 ;; register in case it is still live.
559 [(set (match_operand:VSX_D 0 "memory_operand" "")
560 (match_operand:VSX_D 1 "vsx_register_operand" ""))]
561 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
565 (parallel [(const_int 1) (const_int 0)])))
569 (parallel [(const_int 1) (const_int 0)])))
573 (parallel [(const_int 1) (const_int 0)])))]
576 (define_insn "*vsx_le_perm_store_<mode>"
577 [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
578 (match_operand:VSX_W 1 "vsx_register_operand" "+<VSa>"))]
579 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
581 [(set_attr "type" "vecstore")
582 (set_attr "length" "12")])
585 [(set (match_operand:VSX_W 0 "memory_operand" "")
586 (match_operand:VSX_W 1 "vsx_register_operand" ""))]
587 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
591 (parallel [(const_int 2) (const_int 3)
592 (const_int 0) (const_int 1)])))
596 (parallel [(const_int 2) (const_int 3)
597 (const_int 0) (const_int 1)])))]
599 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
603 ;; The post-reload split requires that we re-permute the source
604 ;; register in case it is still live.
606 [(set (match_operand:VSX_W 0 "memory_operand" "")
607 (match_operand:VSX_W 1 "vsx_register_operand" ""))]
608 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
612 (parallel [(const_int 2) (const_int 3)
613 (const_int 0) (const_int 1)])))
617 (parallel [(const_int 2) (const_int 3)
618 (const_int 0) (const_int 1)])))
622 (parallel [(const_int 2) (const_int 3)
623 (const_int 0) (const_int 1)])))]
626 (define_insn "*vsx_le_perm_store_v8hi"
627 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
628 (match_operand:V8HI 1 "vsx_register_operand" "+wa"))]
629 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
631 [(set_attr "type" "vecstore")
632 (set_attr "length" "12")])
635 [(set (match_operand:V8HI 0 "memory_operand" "")
636 (match_operand:V8HI 1 "vsx_register_operand" ""))]
637 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
641 (parallel [(const_int 4) (const_int 5)
642 (const_int 6) (const_int 7)
643 (const_int 0) (const_int 1)
644 (const_int 2) (const_int 3)])))
648 (parallel [(const_int 4) (const_int 5)
649 (const_int 6) (const_int 7)
650 (const_int 0) (const_int 1)
651 (const_int 2) (const_int 3)])))]
653 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
657 ;; The post-reload split requires that we re-permute the source
658 ;; register in case it is still live.
660 [(set (match_operand:V8HI 0 "memory_operand" "")
661 (match_operand:V8HI 1 "vsx_register_operand" ""))]
662 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
666 (parallel [(const_int 4) (const_int 5)
667 (const_int 6) (const_int 7)
668 (const_int 0) (const_int 1)
669 (const_int 2) (const_int 3)])))
673 (parallel [(const_int 4) (const_int 5)
674 (const_int 6) (const_int 7)
675 (const_int 0) (const_int 1)
676 (const_int 2) (const_int 3)])))
680 (parallel [(const_int 4) (const_int 5)
681 (const_int 6) (const_int 7)
682 (const_int 0) (const_int 1)
683 (const_int 2) (const_int 3)])))]
686 (define_insn "*vsx_le_perm_store_v16qi"
687 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
688 (match_operand:V16QI 1 "vsx_register_operand" "+wa"))]
689 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
691 [(set_attr "type" "vecstore")
692 (set_attr "length" "12")])
695 [(set (match_operand:V16QI 0 "memory_operand" "")
696 (match_operand:V16QI 1 "vsx_register_operand" ""))]
697 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
701 (parallel [(const_int 8) (const_int 9)
702 (const_int 10) (const_int 11)
703 (const_int 12) (const_int 13)
704 (const_int 14) (const_int 15)
705 (const_int 0) (const_int 1)
706 (const_int 2) (const_int 3)
707 (const_int 4) (const_int 5)
708 (const_int 6) (const_int 7)])))
712 (parallel [(const_int 8) (const_int 9)
713 (const_int 10) (const_int 11)
714 (const_int 12) (const_int 13)
715 (const_int 14) (const_int 15)
716 (const_int 0) (const_int 1)
717 (const_int 2) (const_int 3)
718 (const_int 4) (const_int 5)
719 (const_int 6) (const_int 7)])))]
721 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
725 ;; The post-reload split requires that we re-permute the source
726 ;; register in case it is still live.
728 [(set (match_operand:V16QI 0 "memory_operand" "")
729 (match_operand:V16QI 1 "vsx_register_operand" ""))]
730 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
734 (parallel [(const_int 8) (const_int 9)
735 (const_int 10) (const_int 11)
736 (const_int 12) (const_int 13)
737 (const_int 14) (const_int 15)
738 (const_int 0) (const_int 1)
739 (const_int 2) (const_int 3)
740 (const_int 4) (const_int 5)
741 (const_int 6) (const_int 7)])))
745 (parallel [(const_int 8) (const_int 9)
746 (const_int 10) (const_int 11)
747 (const_int 12) (const_int 13)
748 (const_int 14) (const_int 15)
749 (const_int 0) (const_int 1)
750 (const_int 2) (const_int 3)
751 (const_int 4) (const_int 5)
752 (const_int 6) (const_int 7)])))
756 (parallel [(const_int 8) (const_int 9)
757 (const_int 10) (const_int 11)
758 (const_int 12) (const_int 13)
759 (const_int 14) (const_int 15)
760 (const_int 0) (const_int 1)
761 (const_int 2) (const_int 3)
762 (const_int 4) (const_int 5)
763 (const_int 6) (const_int 7)])))]
766 ;; Little endian word swapping for 128-bit types that are either scalars or the
767 ;; special V1TI container class, which it is not appropriate to use vec_select
769 (define_insn "*vsx_le_permute_<mode>"
770 [(set (match_operand:VSX_TI 0 "nonimmediate_operand" "=<VSa>,<VSa>,Z,&r,&r,Q")
772 (match_operand:VSX_TI 1 "input_operand" "<VSa>,Z,<VSa>,r,Q,r")
774 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
776 xxpermdi %x0,%x1,%x1,2
780 ld%U1%X1 %0,%L1\;ld%U1%X1 %L0,%1
781 std%U0%X0 %L1,%0\;std%U0%X0 %1,%L0"
782 [(set_attr "length" "4,4,4,8,8,8")
783 (set_attr "type" "vecperm,vecload,vecstore,*,load,store")])
785 (define_insn_and_split "*vsx_le_undo_permute_<mode>"
786 [(set (match_operand:VSX_TI 0 "vsx_register_operand" "=<VSa>,<VSa>")
789 (match_operand:VSX_TI 1 "vsx_register_operand" "0,<VSa>")
792 "!BYTES_BIG_ENDIAN && TARGET_VSX"
797 [(set (match_dup 0) (match_dup 1))]
799 if (reload_completed && REGNO (operands[0]) == REGNO (operands[1]))
801 emit_note (NOTE_INSN_DELETED);
805 [(set_attr "length" "0,4")
806 (set_attr "type" "veclogical")])
808 (define_insn_and_split "*vsx_le_perm_load_<mode>"
809 [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=<VSa>,r")
810 (match_operand:VSX_LE_128 1 "memory_operand" "Z,Q"))]
811 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
815 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
819 rtx tmp = (can_create_pseudo_p ()
820 ? gen_reg_rtx_and_attrs (operands[0])
822 rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
823 rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
827 [(set_attr "type" "vecload,load")
828 (set_attr "length" "8,8")])
830 (define_insn "*vsx_le_perm_store_<mode>"
831 [(set (match_operand:VSX_LE_128 0 "memory_operand" "=Z,Q")
832 (match_operand:VSX_LE_128 1 "vsx_register_operand" "+<VSa>,r"))]
833 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
837 [(set_attr "type" "vecstore,store")
838 (set_attr "length" "12,8")])
841 [(set (match_operand:VSX_LE_128 0 "memory_operand" "")
842 (match_operand:VSX_LE_128 1 "vsx_register_operand" ""))]
843 "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR"
846 rtx tmp = (can_create_pseudo_p ()
847 ? gen_reg_rtx_and_attrs (operands[0])
849 rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
850 rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
854 ;; Peepholes to catch loads and stores for TImode if TImode landed in
855 ;; GPR registers on a little endian system.
857 [(set (match_operand:VSX_TI 0 "int_reg_operand")
858 (rotate:VSX_TI (match_operand:VSX_TI 1 "memory_operand")
860 (set (match_operand:VSX_TI 2 "int_reg_operand")
861 (rotate:VSX_TI (match_dup 0)
863 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
864 && (rtx_equal_p (operands[0], operands[2])
865 || peep2_reg_dead_p (2, operands[0]))"
866 [(set (match_dup 2) (match_dup 1))])
869 [(set (match_operand:VSX_TI 0 "int_reg_operand")
870 (rotate:VSX_TI (match_operand:VSX_TI 1 "int_reg_operand")
872 (set (match_operand:VSX_TI 2 "memory_operand")
873 (rotate:VSX_TI (match_dup 0)
875 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
876 && peep2_reg_dead_p (2, operands[0])"
877 [(set (match_dup 2) (match_dup 1))])
879 ;; Peephole to catch memory to memory transfers for TImode if TImode landed in
880 ;; VSX registers on a little endian system. The vector types and IEEE 128-bit
881 ;; floating point are handled by the more generic swap elimination pass.
883 [(set (match_operand:TI 0 "vsx_register_operand" "")
884 (rotate:TI (match_operand:TI 1 "vsx_register_operand" "")
886 (set (match_operand:TI 2 "vsx_register_operand" "")
887 (rotate:TI (match_dup 0)
889 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
890 && (rtx_equal_p (operands[0], operands[2])
891 || peep2_reg_dead_p (2, operands[0]))"
892 [(set (match_dup 2) (match_dup 1))])
894 ;; The post-reload split requires that we re-permute the source
895 ;; register in case it is still live.
897 [(set (match_operand:VSX_LE_128 0 "memory_operand" "")
898 (match_operand:VSX_LE_128 1 "vsx_register_operand" ""))]
899 "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR"
902 rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
903 rs6000_emit_le_vsx_permute (operands[0], operands[1], <MODE>mode);
904 rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
908 ;; Vector constants that can be generated with XXSPLTIB that was added in ISA
909 ;; 3.0. Both (const_vector [..]) and (vec_duplicate ...) forms are recognized.
910 (define_insn "xxspltib_v16qi"
911 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
912 (vec_duplicate:V16QI (match_operand:SI 1 "s8bit_cint_operand" "n")))]
915 operands[2] = GEN_INT (INTVAL (operands[1]) & 0xff);
916 return "xxspltib %x0,%2";
918 [(set_attr "type" "vecperm")])
920 (define_insn "xxspltib_<mode>_nosplit"
921 [(set (match_operand:VSINT_842 0 "vsx_register_operand" "=wa,wa")
922 (match_operand:VSINT_842 1 "xxspltib_constant_nosplit" "jwM,wE"))]
925 rtx op1 = operands[1];
929 if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
933 operands[2] = GEN_INT (value & 0xff);
934 return "xxspltib %x0,%2";
936 [(set_attr "type" "vecperm")])
938 (define_insn_and_split "*xxspltib_<mode>_split"
939 [(set (match_operand:VSINT_842 0 "altivec_register_operand" "=v")
940 (match_operand:VSINT_842 1 "xxspltib_constant_split" "wS"))]
948 rtx op0 = operands[0];
949 rtx op1 = operands[1];
950 rtx tmp = ((can_create_pseudo_p ())
951 ? gen_reg_rtx (V16QImode)
952 : gen_lowpart (V16QImode, op0));
954 if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
958 emit_insn (gen_xxspltib_v16qi (tmp, GEN_INT (value)));
960 if (<MODE>mode == V2DImode)
961 emit_insn (gen_vsx_sign_extend_qi_v2di (op0, tmp));
963 else if (<MODE>mode == V4SImode)
964 emit_insn (gen_vsx_sign_extend_qi_v4si (op0, tmp));
966 else if (<MODE>mode == V8HImode)
967 emit_insn (gen_altivec_vupkhsb (op0, tmp));
974 [(set_attr "type" "vecperm")
975 (set_attr "length" "8")])
978 ;; Prefer using vector registers over GPRs. Prefer using ISA 3.0's XXSPLTISB
979 ;; or Altivec VSPLITW 0/-1 over XXLXOR/XXLORC to set a register to all 0's or
980 ;; all 1's, since the machine does not have to wait for the previous
981 ;; instruction using the register being set (such as a store waiting on a slow
982 ;; instruction). But generate XXLXOR/XXLORC if it will avoid a register move.
984 ;; VSX store VSX load VSX move VSX->GPR GPR->VSX LQ (GPR)
985 ;; STQ (GPR) GPR load GPR store GPR move XXSPLTIB VSPLTISW
986 ;; VSX 0/-1 GPR 0/-1 VMX const GPR const LVX (VMX) STVX (VMX)
987 (define_insn "*vsx_mov<mode>_64bit"
988 [(set (match_operand:VSX_M 0 "nonimmediate_operand"
989 "=ZwO, <VSa>, <VSa>, r, we, ?wQ,
990 ?&r, ??r, ??Y, ??r, wo, v,
991 ?<VSa>, *r, v, ??r, wZ, v")
993 (match_operand:VSX_M 1 "input_operand"
994 "<VSa>, ZwO, <VSa>, we, r, r,
995 wQ, Y, r, r, wE, jwM,
996 ?jwM, jwM, W, W, v, wZ"))]
998 "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
999 && (register_operand (operands[0], <MODE>mode)
1000 || register_operand (operands[1], <MODE>mode))"
1002 return rs6000_output_move_128bit (operands);
1005 "vecstore, vecload, vecsimple, mffgpr, mftgpr, load,
1006 store, load, store, *, vecsimple, vecsimple,
1007 vecsimple, *, *, *, vecstore, vecload")
1012 4, 8, 20, 20, 4, 4")])
1014 ;; VSX store VSX load VSX move GPR load GPR store GPR move
1015 ;; XXSPLTIB VSPLTISW VSX 0/-1 GPR 0/-1 VMX const GPR const
1016 ;; LVX (VMX) STVX (VMX)
1017 (define_insn "*vsx_mov<mode>_32bit"
1018 [(set (match_operand:VSX_M 0 "nonimmediate_operand"
1019 "=ZwO, <VSa>, <VSa>, ??r, ??Y, ??r,
1020 wo, v, ?<VSa>, *r, v, ??r,
1023 (match_operand:VSX_M 1 "input_operand"
1024 "<VSa>, ZwO, <VSa>, Y, r, r,
1025 wE, jwM, ?jwM, jwM, W, W,
1028 "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1029 && (register_operand (operands[0], <MODE>mode)
1030 || register_operand (operands[1], <MODE>mode))"
1032 return rs6000_output_move_128bit (operands);
1035 "vecstore, vecload, vecsimple, load, store, *,
1036 vecsimple, vecsimple, vecsimple, *, *, *,
1040 "4, 4, 4, 16, 16, 16,
1041 4, 4, 4, 16, 20, 32,
1044 ;; Explicit load/store expanders for the builtin functions
1045 (define_expand "vsx_load_<mode>"
1046 [(set (match_operand:VSX_M 0 "vsx_register_operand" "")
1047 (match_operand:VSX_M 1 "memory_operand" ""))]
1048 "VECTOR_MEM_VSX_P (<MODE>mode)"
1050 /* Expand to swaps if needed, prior to swap optimization. */
1051 if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR)
1053 rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1058 (define_expand "vsx_store_<mode>"
1059 [(set (match_operand:VSX_M 0 "memory_operand" "")
1060 (match_operand:VSX_M 1 "vsx_register_operand" ""))]
1061 "VECTOR_MEM_VSX_P (<MODE>mode)"
1063 /* Expand to swaps if needed, prior to swap optimization. */
1064 if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR)
1066 rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1071 ;; Explicit load/store expanders for the builtin functions for lxvd2x, etc.,
1072 ;; when you really want their element-reversing behavior.
1073 (define_insn "vsx_ld_elemrev_v2di"
1074 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1076 (match_operand:V2DI 1 "memory_operand" "Z")
1077 (parallel [(const_int 1) (const_int 0)])))]
1078 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1080 [(set_attr "type" "vecload")])
1082 (define_insn "vsx_ld_elemrev_v2df"
1083 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
1085 (match_operand:V2DF 1 "memory_operand" "Z")
1086 (parallel [(const_int 1) (const_int 0)])))]
1087 "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1089 [(set_attr "type" "vecload")])
1091 (define_insn "vsx_ld_elemrev_v4si"
1092 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
1094 (match_operand:V4SI 1 "memory_operand" "Z")
1095 (parallel [(const_int 3) (const_int 2)
1096 (const_int 1) (const_int 0)])))]
1097 "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1099 [(set_attr "type" "vecload")])
1101 (define_insn "vsx_ld_elemrev_v4sf"
1102 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
1104 (match_operand:V4SF 1 "memory_operand" "Z")
1105 (parallel [(const_int 3) (const_int 2)
1106 (const_int 1) (const_int 0)])))]
1107 "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1109 [(set_attr "type" "vecload")])
1111 (define_insn "vsx_ld_elemrev_v8hi"
1112 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1114 (match_operand:V8HI 1 "memory_operand" "Z")
1115 (parallel [(const_int 7) (const_int 6)
1116 (const_int 5) (const_int 4)
1117 (const_int 3) (const_int 2)
1118 (const_int 1) (const_int 0)])))]
1119 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1121 [(set_attr "type" "vecload")])
1123 (define_insn "vsx_ld_elemrev_v16qi"
1124 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1126 (match_operand:V16QI 1 "memory_operand" "Z")
1127 (parallel [(const_int 15) (const_int 14)
1128 (const_int 13) (const_int 12)
1129 (const_int 11) (const_int 10)
1130 (const_int 9) (const_int 8)
1131 (const_int 7) (const_int 6)
1132 (const_int 5) (const_int 4)
1133 (const_int 3) (const_int 2)
1134 (const_int 1) (const_int 0)])))]
1135 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1137 [(set_attr "type" "vecload")])
1139 (define_insn "vsx_st_elemrev_v2df"
1140 [(set (match_operand:V2DF 0 "memory_operand" "=Z")
1142 (match_operand:V2DF 1 "vsx_register_operand" "wa")
1143 (parallel [(const_int 1) (const_int 0)])))]
1144 "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1146 [(set_attr "type" "vecstore")])
1148 (define_insn "vsx_st_elemrev_v2di"
1149 [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1151 (match_operand:V2DI 1 "vsx_register_operand" "wa")
1152 (parallel [(const_int 1) (const_int 0)])))]
1153 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1155 [(set_attr "type" "vecstore")])
1157 (define_insn "vsx_st_elemrev_v4sf"
1158 [(set (match_operand:V4SF 0 "memory_operand" "=Z")
1160 (match_operand:V4SF 1 "vsx_register_operand" "wa")
1161 (parallel [(const_int 3) (const_int 2)
1162 (const_int 1) (const_int 0)])))]
1163 "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1165 [(set_attr "type" "vecstore")])
1167 (define_insn "vsx_st_elemrev_v4si"
1168 [(set (match_operand:V4SI 0 "memory_operand" "=Z")
1170 (match_operand:V4SI 1 "vsx_register_operand" "wa")
1171 (parallel [(const_int 3) (const_int 2)
1172 (const_int 1) (const_int 0)])))]
1173 "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1175 [(set_attr "type" "vecstore")])
1177 (define_insn "vsx_st_elemrev_v8hi"
1178 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1180 (match_operand:V8HI 1 "vsx_register_operand" "wa")
1181 (parallel [(const_int 7) (const_int 6)
1182 (const_int 5) (const_int 4)
1183 (const_int 3) (const_int 2)
1184 (const_int 1) (const_int 0)])))]
1185 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1187 [(set_attr "type" "vecstore")])
1189 (define_insn "vsx_st_elemrev_v16qi"
1190 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1192 (match_operand:V16QI 1 "vsx_register_operand" "wa")
1193 (parallel [(const_int 15) (const_int 14)
1194 (const_int 13) (const_int 12)
1195 (const_int 11) (const_int 10)
1196 (const_int 9) (const_int 8)
1197 (const_int 7) (const_int 6)
1198 (const_int 5) (const_int 4)
1199 (const_int 3) (const_int 2)
1200 (const_int 1) (const_int 0)])))]
1201 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1203 [(set_attr "type" "vecstore")])
1206 ;; VSX vector floating point arithmetic instructions. The VSX scalar
1207 ;; instructions are now combined with the insn for the traditional floating
1209 (define_insn "*vsx_add<mode>3"
1210 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1211 (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1212 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1213 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1214 "xvadd<VSs> %x0,%x1,%x2"
1215 [(set_attr "type" "<VStype_simple>")
1216 (set_attr "fp_type" "<VSfptype_simple>")])
1218 (define_insn "*vsx_sub<mode>3"
1219 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1220 (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1221 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1222 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1223 "xvsub<VSs> %x0,%x1,%x2"
1224 [(set_attr "type" "<VStype_simple>")
1225 (set_attr "fp_type" "<VSfptype_simple>")])
1227 (define_insn "*vsx_mul<mode>3"
1228 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1229 (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1230 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1231 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1232 "xvmul<VSs> %x0,%x1,%x2"
1233 [(set_attr "type" "<VStype_simple>")
1234 (set_attr "fp_type" "<VSfptype_mul>")])
1236 ; Emulate vector with scalar for vec_mul in V2DImode
1237 (define_insn_and_split "vsx_mul_v2di"
1238 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1239 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1240 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1242 "VECTOR_MEM_VSX_P (V2DImode)"
1244 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1248 rtx op0 = operands[0];
1249 rtx op1 = operands[1];
1250 rtx op2 = operands[2];
1251 rtx op3 = gen_reg_rtx (DImode);
1252 rtx op4 = gen_reg_rtx (DImode);
1253 rtx op5 = gen_reg_rtx (DImode);
1254 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1255 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1256 emit_insn (gen_muldi3 (op5, op3, op4));
1257 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1258 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1259 emit_insn (gen_muldi3 (op3, op3, op4));
1260 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1263 [(set_attr "type" "mul")])
1265 (define_insn "*vsx_div<mode>3"
1266 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1267 (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1268 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1269 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1270 "xvdiv<VSs> %x0,%x1,%x2"
1271 [(set_attr "type" "<VStype_div>")
1272 (set_attr "fp_type" "<VSfptype_div>")])
1274 ; Emulate vector with scalar for vec_div in V2DImode
1275 (define_insn_and_split "vsx_div_v2di"
1276 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1277 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1278 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1280 "VECTOR_MEM_VSX_P (V2DImode)"
1282 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1286 rtx op0 = operands[0];
1287 rtx op1 = operands[1];
1288 rtx op2 = operands[2];
1289 rtx op3 = gen_reg_rtx (DImode);
1290 rtx op4 = gen_reg_rtx (DImode);
1291 rtx op5 = gen_reg_rtx (DImode);
1292 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1293 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1294 emit_insn (gen_divdi3 (op5, op3, op4));
1295 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1296 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1297 emit_insn (gen_divdi3 (op3, op3, op4));
1298 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1301 [(set_attr "type" "div")])
1303 (define_insn_and_split "vsx_udiv_v2di"
1304 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1305 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1306 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1308 "VECTOR_MEM_VSX_P (V2DImode)"
1310 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1314 rtx op0 = operands[0];
1315 rtx op1 = operands[1];
1316 rtx op2 = operands[2];
1317 rtx op3 = gen_reg_rtx (DImode);
1318 rtx op4 = gen_reg_rtx (DImode);
1319 rtx op5 = gen_reg_rtx (DImode);
1320 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1321 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1322 emit_insn (gen_udivdi3 (op5, op3, op4));
1323 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1324 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1325 emit_insn (gen_udivdi3 (op3, op3, op4));
1326 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1329 [(set_attr "type" "div")])
1331 ;; *tdiv* instruction returning the FG flag
1332 (define_expand "vsx_tdiv<mode>3_fg"
1334 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")
1335 (match_operand:VSX_B 2 "vsx_register_operand" "")]
1337 (set (match_operand:SI 0 "gpc_reg_operand" "")
1338 (gt:SI (match_dup 3)
1340 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1342 operands[3] = gen_reg_rtx (CCFPmode);
1345 ;; *tdiv* instruction returning the FE flag
1346 (define_expand "vsx_tdiv<mode>3_fe"
1348 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")
1349 (match_operand:VSX_B 2 "vsx_register_operand" "")]
1351 (set (match_operand:SI 0 "gpc_reg_operand" "")
1352 (eq:SI (match_dup 3)
1354 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1356 operands[3] = gen_reg_rtx (CCFPmode);
1359 (define_insn "*vsx_tdiv<mode>3_internal"
1360 [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
1361 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")
1362 (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,<VSa>")]
1364 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1365 "x<VSv>tdiv<VSs> %0,%x1,%x2"
1366 [(set_attr "type" "<VStype_simple>")
1367 (set_attr "fp_type" "<VSfptype_simple>")])
1369 (define_insn "vsx_fre<mode>2"
1370 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1371 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1373 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1375 [(set_attr "type" "<VStype_simple>")
1376 (set_attr "fp_type" "<VSfptype_simple>")])
1378 (define_insn "*vsx_neg<mode>2"
1379 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1380 (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1381 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1382 "xvneg<VSs> %x0,%x1"
1383 [(set_attr "type" "<VStype_simple>")
1384 (set_attr "fp_type" "<VSfptype_simple>")])
1386 (define_insn "*vsx_abs<mode>2"
1387 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1388 (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1389 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1390 "xvabs<VSs> %x0,%x1"
1391 [(set_attr "type" "<VStype_simple>")
1392 (set_attr "fp_type" "<VSfptype_simple>")])
1394 (define_insn "vsx_nabs<mode>2"
1395 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1398 (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>"))))]
1399 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1400 "xvnabs<VSs> %x0,%x1"
1401 [(set_attr "type" "<VStype_simple>")
1402 (set_attr "fp_type" "<VSfptype_simple>")])
1404 (define_insn "vsx_smax<mode>3"
1405 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1406 (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1407 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1408 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1409 "xvmax<VSs> %x0,%x1,%x2"
1410 [(set_attr "type" "<VStype_simple>")
1411 (set_attr "fp_type" "<VSfptype_simple>")])
1413 (define_insn "*vsx_smin<mode>3"
1414 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1415 (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1416 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1417 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1418 "xvmin<VSs> %x0,%x1,%x2"
1419 [(set_attr "type" "<VStype_simple>")
1420 (set_attr "fp_type" "<VSfptype_simple>")])
1422 (define_insn "*vsx_sqrt<mode>2"
1423 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1424 (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1425 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1426 "xvsqrt<VSs> %x0,%x1"
1427 [(set_attr "type" "<VStype_sqrt>")
1428 (set_attr "fp_type" "<VSfptype_sqrt>")])
1430 (define_insn "*vsx_rsqrte<mode>2"
1431 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1432 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1434 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1435 "xvrsqrte<VSs> %x0,%x1"
1436 [(set_attr "type" "<VStype_simple>")
1437 (set_attr "fp_type" "<VSfptype_simple>")])
1439 ;; *tsqrt* returning the fg flag
1440 (define_expand "vsx_tsqrt<mode>2_fg"
1442 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")]
1444 (set (match_operand:SI 0 "gpc_reg_operand" "")
1445 (gt:SI (match_dup 2)
1447 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1449 operands[2] = gen_reg_rtx (CCFPmode);
1452 ;; *tsqrt* returning the fe flag
1453 (define_expand "vsx_tsqrt<mode>2_fe"
1455 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")]
1457 (set (match_operand:SI 0 "gpc_reg_operand" "")
1458 (eq:SI (match_dup 2)
1460 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1462 operands[2] = gen_reg_rtx (CCFPmode);
1465 (define_insn "*vsx_tsqrt<mode>2_internal"
1466 [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
1467 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
1469 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1470 "x<VSv>tsqrt<VSs> %0,%x1"
1471 [(set_attr "type" "<VStype_simple>")
1472 (set_attr "fp_type" "<VSfptype_simple>")])
1474 ;; Fused vector multiply/add instructions. Support the classical Altivec
1475 ;; versions of fma, which allows the target to be a separate register from the
1476 ;; 3 inputs. Under VSX, the target must be either the addend or the first
1479 (define_insn "*vsx_fmav4sf4"
1480 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
1482 (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v")
1483 (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v")
1484 (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))]
1485 "VECTOR_UNIT_VSX_P (V4SFmode)"
1487 xvmaddasp %x0,%x1,%x2
1488 xvmaddmsp %x0,%x1,%x3
1489 xvmaddasp %x0,%x1,%x2
1490 xvmaddmsp %x0,%x1,%x3
1491 vmaddfp %0,%1,%2,%3"
1492 [(set_attr "type" "vecfloat")])
1494 (define_insn "*vsx_fmav2df4"
1495 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa")
1497 (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa")
1498 (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0")
1499 (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))]
1500 "VECTOR_UNIT_VSX_P (V2DFmode)"
1502 xvmaddadp %x0,%x1,%x2
1503 xvmaddmdp %x0,%x1,%x3
1504 xvmaddadp %x0,%x1,%x2
1505 xvmaddmdp %x0,%x1,%x3"
1506 [(set_attr "type" "vecdouble")])
1508 (define_insn "*vsx_fms<mode>4"
1509 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?<VSa>,?<VSa>")
1511 (match_operand:VSX_F 1 "vsx_register_operand" "%<VSr>,<VSr>,<VSa>,<VSa>")
1512 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,<VSa>,0")
1514 (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,<VSa>"))))]
1515 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1517 xvmsuba<VSs> %x0,%x1,%x2
1518 xvmsubm<VSs> %x0,%x1,%x3
1519 xvmsuba<VSs> %x0,%x1,%x2
1520 xvmsubm<VSs> %x0,%x1,%x3"
1521 [(set_attr "type" "<VStype_mul>")])
1523 (define_insn "*vsx_nfma<mode>4"
1524 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?<VSa>,?<VSa>")
1527 (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSr>,<VSa>,<VSa>")
1528 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,<VSa>,0")
1529 (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,<VSa>"))))]
1530 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1532 xvnmadda<VSs> %x0,%x1,%x2
1533 xvnmaddm<VSs> %x0,%x1,%x3
1534 xvnmadda<VSs> %x0,%x1,%x2
1535 xvnmaddm<VSs> %x0,%x1,%x3"
1536 [(set_attr "type" "<VStype_mul>")
1537 (set_attr "fp_type" "<VSfptype_mul>")])
1539 (define_insn "*vsx_nfmsv4sf4"
1540 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
1543 (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v")
1544 (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v")
1546 (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))))]
1547 "VECTOR_UNIT_VSX_P (V4SFmode)"
1549 xvnmsubasp %x0,%x1,%x2
1550 xvnmsubmsp %x0,%x1,%x3
1551 xvnmsubasp %x0,%x1,%x2
1552 xvnmsubmsp %x0,%x1,%x3
1553 vnmsubfp %0,%1,%2,%3"
1554 [(set_attr "type" "vecfloat")])
1556 (define_insn "*vsx_nfmsv2df4"
1557 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa")
1560 (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa")
1561 (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0")
1563 (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))))]
1564 "VECTOR_UNIT_VSX_P (V2DFmode)"
1566 xvnmsubadp %x0,%x1,%x2
1567 xvnmsubmdp %x0,%x1,%x3
1568 xvnmsubadp %x0,%x1,%x2
1569 xvnmsubmdp %x0,%x1,%x3"
1570 [(set_attr "type" "vecdouble")])
1572 ;; Vector conditional expressions (no scalar version for these instructions)
1573 (define_insn "vsx_eq<mode>"
1574 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1575 (eq:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1576 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1577 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1578 "xvcmpeq<VSs> %x0,%x1,%x2"
1579 [(set_attr "type" "<VStype_simple>")
1580 (set_attr "fp_type" "<VSfptype_simple>")])
1582 (define_insn "vsx_gt<mode>"
1583 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1584 (gt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1585 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1586 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1587 "xvcmpgt<VSs> %x0,%x1,%x2"
1588 [(set_attr "type" "<VStype_simple>")
1589 (set_attr "fp_type" "<VSfptype_simple>")])
1591 (define_insn "*vsx_ge<mode>"
1592 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1593 (ge:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1594 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1595 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1596 "xvcmpge<VSs> %x0,%x1,%x2"
1597 [(set_attr "type" "<VStype_simple>")
1598 (set_attr "fp_type" "<VSfptype_simple>")])
1600 ;; Compare vectors producing a vector result and a predicate, setting CR6 to
1601 ;; indicate a combined status
1602 (define_insn "*vsx_eq_<mode>_p"
1603 [(set (reg:CC CR6_REGNO)
1605 [(eq:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
1606 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
1608 (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1609 (eq:VSX_F (match_dup 1)
1611 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1612 "xvcmpeq<VSs>. %x0,%x1,%x2"
1613 [(set_attr "type" "<VStype_simple>")])
1615 (define_insn "*vsx_gt_<mode>_p"
1616 [(set (reg:CC CR6_REGNO)
1618 [(gt:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
1619 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
1621 (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1622 (gt:VSX_F (match_dup 1)
1624 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1625 "xvcmpgt<VSs>. %x0,%x1,%x2"
1626 [(set_attr "type" "<VStype_simple>")])
1628 (define_insn "*vsx_ge_<mode>_p"
1629 [(set (reg:CC CR6_REGNO)
1631 [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
1632 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
1634 (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1635 (ge:VSX_F (match_dup 1)
1637 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1638 "xvcmpge<VSs>. %x0,%x1,%x2"
1639 [(set_attr "type" "<VStype_simple>")])
1642 (define_insn "*vsx_xxsel<mode>"
1643 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1645 (ne:CC (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,<VSa>")
1646 (match_operand:VSX_L 4 "zero_constant" ""))
1647 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,<VSa>")
1648 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,<VSa>")))]
1649 "VECTOR_MEM_VSX_P (<MODE>mode)"
1650 "xxsel %x0,%x3,%x2,%x1"
1651 [(set_attr "type" "vecmove")])
1653 (define_insn "*vsx_xxsel<mode>_uns"
1654 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1656 (ne:CCUNS (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,<VSa>")
1657 (match_operand:VSX_L 4 "zero_constant" ""))
1658 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,<VSa>")
1659 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,<VSa>")))]
1660 "VECTOR_MEM_VSX_P (<MODE>mode)"
1661 "xxsel %x0,%x3,%x2,%x1"
1662 [(set_attr "type" "vecmove")])
1665 (define_insn "vsx_copysign<mode>3"
1666 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1668 [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1669 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")]
1671 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1672 "xvcpsgn<VSs> %x0,%x2,%x1"
1673 [(set_attr "type" "<VStype_simple>")
1674 (set_attr "fp_type" "<VSfptype_simple>")])
1676 ;; For the conversions, limit the register class for the integer value to be
1677 ;; the fprs because we don't want to add the altivec registers to movdi/movsi.
1678 ;; For the unsigned tests, there isn't a generic double -> unsigned conversion
1679 ;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX.
1680 ;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md
1681 ;; in allowing virtual registers.
1682 (define_insn "vsx_float<VSi><mode>2"
1683 [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=<VSr>,?<VSa>")
1684 (float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
1685 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1686 "xvcvsx<VSc><VSs> %x0,%x1"
1687 [(set_attr "type" "<VStype_simple>")
1688 (set_attr "fp_type" "<VSfptype_simple>")])
1690 (define_insn "vsx_floatuns<VSi><mode>2"
1691 [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=<VSr>,?<VSa>")
1692 (unsigned_float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
1693 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1694 "xvcvux<VSc><VSs> %x0,%x1"
1695 [(set_attr "type" "<VStype_simple>")
1696 (set_attr "fp_type" "<VSfptype_simple>")])
1698 (define_insn "vsx_fix_trunc<mode><VSi>2"
1699 [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
1700 (fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "<VSr>,<VSa>")))]
1701 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1702 "x<VSv>cv<VSs>sx<VSc>s %x0,%x1"
1703 [(set_attr "type" "<VStype_simple>")
1704 (set_attr "fp_type" "<VSfptype_simple>")])
1706 (define_insn "vsx_fixuns_trunc<mode><VSi>2"
1707 [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
1708 (unsigned_fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "<VSr>,<VSa>")))]
1709 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1710 "x<VSv>cv<VSs>ux<VSc>s %x0,%x1"
1711 [(set_attr "type" "<VStype_simple>")
1712 (set_attr "fp_type" "<VSfptype_simple>")])
1714 ;; Math rounding functions
1715 (define_insn "vsx_x<VSv>r<VSs>i"
1716 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1717 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
1718 UNSPEC_VSX_ROUND_I))]
1719 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1720 "x<VSv>r<VSs>i %x0,%x1"
1721 [(set_attr "type" "<VStype_simple>")
1722 (set_attr "fp_type" "<VSfptype_simple>")])
1724 (define_insn "vsx_x<VSv>r<VSs>ic"
1725 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1726 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
1727 UNSPEC_VSX_ROUND_IC))]
1728 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1729 "x<VSv>r<VSs>ic %x0,%x1"
1730 [(set_attr "type" "<VStype_simple>")
1731 (set_attr "fp_type" "<VSfptype_simple>")])
1733 (define_insn "vsx_btrunc<mode>2"
1734 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1735 (fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1736 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1737 "xvr<VSs>iz %x0,%x1"
1738 [(set_attr "type" "<VStype_simple>")
1739 (set_attr "fp_type" "<VSfptype_simple>")])
1741 (define_insn "*vsx_b2trunc<mode>2"
1742 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1743 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
1745 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1746 "x<VSv>r<VSs>iz %x0,%x1"
1747 [(set_attr "type" "<VStype_simple>")
1748 (set_attr "fp_type" "<VSfptype_simple>")])
1750 (define_insn "vsx_floor<mode>2"
1751 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1752 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1754 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1755 "xvr<VSs>im %x0,%x1"
1756 [(set_attr "type" "<VStype_simple>")
1757 (set_attr "fp_type" "<VSfptype_simple>")])
1759 (define_insn "vsx_ceil<mode>2"
1760 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1761 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1763 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1764 "xvr<VSs>ip %x0,%x1"
1765 [(set_attr "type" "<VStype_simple>")
1766 (set_attr "fp_type" "<VSfptype_simple>")])
1769 ;; VSX convert to/from double vector
1771 ;; Convert between single and double precision
1772 ;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal
1773 ;; scalar single precision instructions internally use the double format.
1774 ;; Prefer the altivec registers, since we likely will need to do a vperm
1775 (define_insn "vsx_<VS_spdp_insn>"
1776 [(set (match_operand:<VS_spdp_res> 0 "vsx_register_operand" "=<VSr4>,?<VSa>")
1777 (unspec:<VS_spdp_res> [(match_operand:VSX_SPDP 1 "vsx_register_operand" "<VSr5>,<VSa>")]
1778 UNSPEC_VSX_CVSPDP))]
1779 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1780 "<VS_spdp_insn> %x0,%x1"
1781 [(set_attr "type" "<VS_spdp_type>")])
1783 ;; xscvspdp, represent the scalar SF type as V4SF
1784 (define_insn "vsx_xscvspdp"
1785 [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
1786 (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
1787 UNSPEC_VSX_CVSPDP))]
1788 "VECTOR_UNIT_VSX_P (V4SFmode)"
1790 [(set_attr "type" "fp")])
1792 ;; Same as vsx_xscvspdp, but use SF as the type
1793 (define_insn "vsx_xscvspdp_scalar2"
1794 [(set (match_operand:SF 0 "vsx_register_operand" "=ww")
1795 (unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
1796 UNSPEC_VSX_CVSPDP))]
1797 "VECTOR_UNIT_VSX_P (V4SFmode)"
1799 [(set_attr "type" "fp")])
1801 ;; Generate xvcvhpsp instruction
1802 (define_insn "vsx_xvcvhpsp"
1803 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
1804 (unspec:V4SF [(match_operand: V16QI 1 "vsx_register_operand" "wa")]
1805 UNSPEC_VSX_CVHPSP))]
1808 [(set_attr "type" "vecfloat")])
1810 ;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF
1811 ;; format of scalars is actually DF.
1812 (define_insn "vsx_xscvdpsp_scalar"
1813 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
1814 (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "ww")]
1815 UNSPEC_VSX_CVSPDP))]
1816 "VECTOR_UNIT_VSX_P (V4SFmode)"
1818 [(set_attr "type" "fp")])
1820 ;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs
1821 (define_insn "vsx_xscvdpspn"
1822 [(set (match_operand:V4SF 0 "vsx_register_operand" "=ww")
1823 (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "ws")]
1824 UNSPEC_VSX_CVDPSPN))]
1827 [(set_attr "type" "fp")])
1829 (define_insn "vsx_xscvspdpn"
1830 [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
1831 (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
1832 UNSPEC_VSX_CVSPDPN))]
1835 [(set_attr "type" "fp")])
1837 (define_insn "vsx_xscvdpspn_scalar"
1838 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
1839 (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "ww")]
1840 UNSPEC_VSX_CVDPSPN))]
1843 [(set_attr "type" "fp")])
1845 ;; Used by direct move to move a SFmode value from GPR to VSX register
1846 (define_insn "vsx_xscvspdpn_directmove"
1847 [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
1848 (unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
1849 UNSPEC_VSX_CVSPDPN))]
1852 [(set_attr "type" "fp")])
1854 ;; Convert and scale (used by vec_ctf, vec_cts, vec_ctu for double/long long)
1856 (define_expand "vsx_xvcvsxddp_scale"
1857 [(match_operand:V2DF 0 "vsx_register_operand" "")
1858 (match_operand:V2DI 1 "vsx_register_operand" "")
1859 (match_operand:QI 2 "immediate_operand" "")]
1860 "VECTOR_UNIT_VSX_P (V2DFmode)"
1862 rtx op0 = operands[0];
1863 rtx op1 = operands[1];
1864 int scale = INTVAL(operands[2]);
1865 emit_insn (gen_vsx_xvcvsxddp (op0, op1));
1867 rs6000_scale_v2df (op0, op0, -scale);
1871 (define_insn "vsx_xvcvsxddp"
1872 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
1873 (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
1874 UNSPEC_VSX_XVCVSXDDP))]
1875 "VECTOR_UNIT_VSX_P (V2DFmode)"
1877 [(set_attr "type" "vecdouble")])
1879 (define_expand "vsx_xvcvuxddp_scale"
1880 [(match_operand:V2DF 0 "vsx_register_operand" "")
1881 (match_operand:V2DI 1 "vsx_register_operand" "")
1882 (match_operand:QI 2 "immediate_operand" "")]
1883 "VECTOR_UNIT_VSX_P (V2DFmode)"
1885 rtx op0 = operands[0];
1886 rtx op1 = operands[1];
1887 int scale = INTVAL(operands[2]);
1888 emit_insn (gen_vsx_xvcvuxddp (op0, op1));
1890 rs6000_scale_v2df (op0, op0, -scale);
1894 (define_insn "vsx_xvcvuxddp"
1895 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
1896 (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
1897 UNSPEC_VSX_XVCVUXDDP))]
1898 "VECTOR_UNIT_VSX_P (V2DFmode)"
1900 [(set_attr "type" "vecdouble")])
1902 (define_expand "vsx_xvcvdpsxds_scale"
1903 [(match_operand:V2DI 0 "vsx_register_operand" "")
1904 (match_operand:V2DF 1 "vsx_register_operand" "")
1905 (match_operand:QI 2 "immediate_operand" "")]
1906 "VECTOR_UNIT_VSX_P (V2DFmode)"
1908 rtx op0 = operands[0];
1909 rtx op1 = operands[1];
1911 int scale = INTVAL (operands[2]);
1916 tmp = gen_reg_rtx (V2DFmode);
1917 rs6000_scale_v2df (tmp, op1, scale);
1919 emit_insn (gen_vsx_xvcvdpsxds (op0, tmp));
1923 ;; convert vector of 64-bit floating point numbers to vector of
1924 ;; 64-bit signed integer
1925 (define_insn "vsx_xvcvdpsxds"
1926 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1927 (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
1928 UNSPEC_VSX_XVCVDPSXDS))]
1929 "VECTOR_UNIT_VSX_P (V2DFmode)"
1930 "xvcvdpsxds %x0,%x1"
1931 [(set_attr "type" "vecdouble")])
1933 ;; convert vector of 32-bit floating point numbers to vector of
1934 ;; 32-bit signed integer
1935 (define_insn "vsx_xvcvspsxws"
1936 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
1937 (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
1938 UNSPEC_VSX_XVCVSPSXWS))]
1939 "VECTOR_UNIT_VSX_P (V4SFmode)"
1940 "xvcvspsxws %x0,%x1"
1941 [(set_attr "type" "vecfloat")])
1943 ;; convert vector of 64-bit floating point numbers to vector of
1944 ;; 64-bit unsigned integer
1945 (define_expand "vsx_xvcvdpuxds_scale"
1946 [(match_operand:V2DI 0 "vsx_register_operand" "")
1947 (match_operand:V2DF 1 "vsx_register_operand" "")
1948 (match_operand:QI 2 "immediate_operand" "")]
1949 "VECTOR_UNIT_VSX_P (V2DFmode)"
1951 rtx op0 = operands[0];
1952 rtx op1 = operands[1];
1954 int scale = INTVAL (operands[2]);
1959 tmp = gen_reg_rtx (V2DFmode);
1960 rs6000_scale_v2df (tmp, op1, scale);
1962 emit_insn (gen_vsx_xvcvdpuxds (op0, tmp));
1966 ;; convert vector of 32-bit floating point numbers to vector of
1967 ;; 32-bit unsigned integer
1968 (define_insn "vsx_xvcvspuxws"
1969 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
1970 (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
1971 UNSPEC_VSX_XVCVSPSXWS))]
1972 "VECTOR_UNIT_VSX_P (V4SFmode)"
1973 "xvcvspuxws %x0,%x1"
1974 [(set_attr "type" "vecfloat")])
1976 (define_insn "vsx_xvcvdpuxds"
1977 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1978 (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
1979 UNSPEC_VSX_XVCVDPUXDS))]
1980 "VECTOR_UNIT_VSX_P (V2DFmode)"
1981 "xvcvdpuxds %x0,%x1"
1982 [(set_attr "type" "vecdouble")])
1984 ;; Convert from 64-bit to 32-bit types
1985 ;; Note, favor the Altivec registers since the usual use of these instructions
1986 ;; is in vector converts and we need to use the Altivec vperm instruction.
1988 (define_insn "vsx_xvcvdpsxws"
1989 [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
1990 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
1991 UNSPEC_VSX_CVDPSXWS))]
1992 "VECTOR_UNIT_VSX_P (V2DFmode)"
1993 "xvcvdpsxws %x0,%x1"
1994 [(set_attr "type" "vecdouble")])
1996 (define_insn "vsx_xvcvdpuxws"
1997 [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
1998 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
1999 UNSPEC_VSX_CVDPUXWS))]
2000 "VECTOR_UNIT_VSX_P (V2DFmode)"
2001 "xvcvdpuxws %x0,%x1"
2002 [(set_attr "type" "vecdouble")])
2004 (define_insn "vsx_xvcvsxdsp"
2005 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wd,?wa")
2006 (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wf,wa")]
2007 UNSPEC_VSX_CVSXDSP))]
2008 "VECTOR_UNIT_VSX_P (V2DFmode)"
2010 [(set_attr "type" "vecfloat")])
2012 (define_insn "vsx_xvcvuxdsp"
2013 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wd,?wa")
2014 (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wf,wa")]
2015 UNSPEC_VSX_CVUXDSP))]
2016 "VECTOR_UNIT_VSX_P (V2DFmode)"
2018 [(set_attr "type" "vecdouble")])
2020 ;; Convert from 32-bit to 64-bit types
2021 ;; Provide both vector and scalar targets
2022 (define_insn "vsx_xvcvsxwdp"
2023 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
2024 (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
2025 UNSPEC_VSX_CVSXWDP))]
2026 "VECTOR_UNIT_VSX_P (V2DFmode)"
2028 [(set_attr "type" "vecdouble")])
2030 (define_insn "vsx_xvcvsxwdp_df"
2031 [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
2032 (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2033 UNSPEC_VSX_CVSXWDP))]
2036 [(set_attr "type" "vecdouble")])
2038 (define_insn "vsx_xvcvuxwdp"
2039 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
2040 (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
2041 UNSPEC_VSX_CVUXWDP))]
2042 "VECTOR_UNIT_VSX_P (V2DFmode)"
2044 [(set_attr "type" "vecdouble")])
2046 (define_insn "vsx_xvcvuxwdp_df"
2047 [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
2048 (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2049 UNSPEC_VSX_CVUXWDP))]
2052 [(set_attr "type" "vecdouble")])
2054 (define_insn "vsx_xvcvspsxds"
2055 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2056 (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
2057 UNSPEC_VSX_CVSPSXDS))]
2058 "VECTOR_UNIT_VSX_P (V2DFmode)"
2059 "xvcvspsxds %x0,%x1"
2060 [(set_attr "type" "vecdouble")])
2062 (define_insn "vsx_xvcvspuxds"
2063 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2064 (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
2065 UNSPEC_VSX_CVSPUXDS))]
2066 "VECTOR_UNIT_VSX_P (V2DFmode)"
2067 "xvcvspuxds %x0,%x1"
2068 [(set_attr "type" "vecdouble")])
2070 (define_insn "vsx_xvcvsxwsp"
2071 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2072 (unspec:V4SF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2073 UNSPEC_VSX_CVSXWSP))]
2074 "VECTOR_UNIT_VSX_P (V4SFmode)"
2076 [(set_attr "type" "vecfloat")])
2078 (define_insn "vsx_xvcvuxwsp"
2079 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2080 (unspec:V4SF[(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2081 UNSPEC_VSX_CVUXWSP))]
2082 "VECTOR_UNIT_VSX_P (V4SFmode)"
2084 [(set_attr "type" "vecfloat")])
2087 ;; convert two long long signed ints to float
2088 (define_expand "float2_v2di"
2089 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2090 (use (match_operand:V2DI 1 "register_operand" "wa"))
2091 (use (match_operand:V2DI 2 "register_operand" "wa"))]
2092 "VECTOR_UNIT_VSX_P (V4SFmode)"
2094 rtx rtx_src1, rtx_src2, rtx_dst;
2096 rtx_dst = operands[0];
2097 rtx_src1 = operands[1];
2098 rtx_src2 = operands[2];
2100 rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2104 ;; Generate uns_float2
2105 ;; convert two long long unsigned ints to float
2106 (define_expand "uns_float2_v2di"
2107 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2108 (use (match_operand:V2DI 1 "register_operand" "wa"))
2109 (use (match_operand:V2DI 2 "register_operand" "wa"))]
2110 "VECTOR_UNIT_VSX_P (V4SFmode)"
2112 rtx rtx_src1, rtx_src2, rtx_dst;
2114 rtx_dst = operands[0];
2115 rtx_src1 = operands[1];
2116 rtx_src2 = operands[2];
2118 rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2123 ;; convert double or long long signed to float
2124 ;; (Only even words are valid, BE numbering)
2125 (define_expand "floate<mode>"
2126 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2127 (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2128 "VECTOR_UNIT_VSX_P (V4SFmode)"
2130 if (VECTOR_ELT_ORDER_BIG)
2132 /* Shift left one word to put even word correct location */
2134 rtx rtx_val = GEN_INT (4);
2136 rtx_tmp = gen_reg_rtx (V4SFmode);
2137 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2138 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2139 rtx_tmp, rtx_tmp, rtx_val));
2142 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2147 ;; Generate uns_floate
2148 ;; convert long long unsigned to float
2149 ;; (Only even words are valid, BE numbering)
2150 (define_expand "unsfloatev2di"
2151 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2152 (use (match_operand:V2DI 1 "register_operand" "wa"))]
2153 "VECTOR_UNIT_VSX_P (V4SFmode)"
2155 if (VECTOR_ELT_ORDER_BIG)
2157 /* Shift left one word to put even word correct location */
2159 rtx rtx_val = GEN_INT (4);
2161 rtx_tmp = gen_reg_rtx (V4SFmode);
2162 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2163 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2164 rtx_tmp, rtx_tmp, rtx_val));
2167 emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2173 ;; convert double or long long signed to float
2174 ;; Only odd words are valid, BE numbering)
2175 (define_expand "floato<mode>"
2176 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2177 (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2178 "VECTOR_UNIT_VSX_P (V4SFmode)"
2180 if (VECTOR_ELT_ORDER_BIG)
2181 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2184 /* Shift left one word to put odd word correct location */
2186 rtx rtx_val = GEN_INT (4);
2188 rtx_tmp = gen_reg_rtx (V4SFmode);
2189 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2190 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2191 rtx_tmp, rtx_tmp, rtx_val));
2196 ;; Generate uns_floato
2197 ;; convert long long unsigned to float
2198 ;; (Only odd words are valid, BE numbering)
2199 (define_expand "unsfloatov2di"
2200 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2201 (use (match_operand:V2DI 1 "register_operand" "wa"))]
2202 "VECTOR_UNIT_VSX_P (V4SFmode)"
2204 if (VECTOR_ELT_ORDER_BIG)
2205 emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2208 /* Shift left one word to put odd word correct location */
2210 rtx rtx_val = GEN_INT (4);
2212 rtx_tmp = gen_reg_rtx (V4SFmode);
2213 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2214 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2215 rtx_tmp, rtx_tmp, rtx_val));
2220 ;; Generate vsigned2
2221 ;; convert two double float vectors to a vector of single precision ints
2222 (define_expand "vsigned2_v2df"
2223 [(match_operand:V4SI 0 "register_operand" "=wa")
2224 (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "wa")
2225 (match_operand:V2DF 2 "register_operand" "wa")]
2226 UNSPEC_VSX_VSIGNED2)]
2229 rtx rtx_src1, rtx_src2, rtx_dst;
2230 bool signed_convert=true;
2232 rtx_dst = operands[0];
2233 rtx_src1 = operands[1];
2234 rtx_src2 = operands[2];
2236 rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2240 ;; Generate vsignedo_v2df
2241 ;; signed double float to int convert odd word
2242 (define_expand "vsignedo_v2df"
2243 [(set (match_operand:V4SI 0 "register_operand" "=wa")
2244 (match_operand:V2DF 1 "register_operand" "wa"))]
2247 if (VECTOR_ELT_ORDER_BIG)
2250 rtx rtx_val = GEN_INT (12);
2251 rtx_tmp = gen_reg_rtx (V4SImode);
2253 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2255 /* Big endian word numbering for words in operand is 0 1 2 3.
2256 take (operand[1] operand[1]) and shift left one word
2257 0 1 2 3 0 1 2 3 => 1 2 3 0
2258 Words 1 and 3 are now are now where they need to be for result. */
2260 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2264 /* Little endian word numbering for operand is 3 2 1 0.
2265 Result words 3 and 1 are where they need to be. */
2266 emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2270 [(set_attr "type" "veccomplex")])
2272 ;; Generate vsignede_v2df
2273 ;; signed double float to int even word
2274 (define_expand "vsignede_v2df"
2275 [(set (match_operand:V4SI 0 "register_operand" "=v")
2276 (match_operand:V2DF 1 "register_operand" "v"))]
2279 if (VECTOR_ELT_ORDER_BIG)
2280 /* Big endian word numbering for words in operand is 0 1
2281 Result words 0 is where they need to be. */
2282 emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2287 rtx rtx_val = GEN_INT (12);
2288 rtx_tmp = gen_reg_rtx (V4SImode);
2290 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2292 /* Little endian word numbering for operand is 3 2 1 0.
2293 take (operand[1] operand[1]) and shift left three words
2294 0 1 2 3 0 1 2 3 => 3 0 1 2
2295 Words 0 and 2 are now where they need to be for the result. */
2296 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2301 [(set_attr "type" "veccomplex")])
2303 ;; Generate unsigned2
2304 ;; convert two double float vectors to a vector of single precision
2306 (define_expand "vunsigned2_v2df"
2307 [(match_operand:V4SI 0 "register_operand" "=v")
2308 (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "v")
2309 (match_operand:V2DF 2 "register_operand" "v")]
2310 UNSPEC_VSX_VSIGNED2)]
2313 rtx rtx_src1, rtx_src2, rtx_dst;
2314 bool signed_convert=false;
2316 rtx_dst = operands[0];
2317 rtx_src1 = operands[1];
2318 rtx_src2 = operands[2];
2320 rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2324 ;; Generate vunsignedo_v2df
2325 ;; unsigned double float to int convert odd word
2326 (define_expand "vunsignedo_v2df"
2327 [(set (match_operand:V4SI 0 "register_operand" "=v")
2328 (match_operand:V2DF 1 "register_operand" "v"))]
2331 if (VECTOR_ELT_ORDER_BIG)
2334 rtx rtx_val = GEN_INT (12);
2335 rtx_tmp = gen_reg_rtx (V4SImode);
2337 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
2339 /* Big endian word numbering for words in operand is 0 1 2 3.
2340 take (operand[1] operand[1]) and shift left one word
2341 0 1 2 3 0 1 2 3 => 1 2 3 0
2342 Words 1 and 3 are now are now where they need to be for result. */
2344 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2348 /* Little endian word numbering for operand is 3 2 1 0.
2349 Result words 3 and 1 are where they need to be. */
2350 emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
2354 [(set_attr "type" "veccomplex")])
2356 ;; Generate vunsignede_v2df
2357 ;; unsigned double float to int even word
2358 (define_expand "vunsignede_v2df"
2359 [(set (match_operand:V4SI 0 "register_operand" "=v")
2360 (match_operand:V2DF 1 "register_operand" "v"))]
2363 if (VECTOR_ELT_ORDER_BIG)
2364 /* Big endian word numbering for words in operand is 0 1
2365 Result words 0 is where they need to be. */
2366 emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
2371 rtx rtx_val = GEN_INT (12);
2372 rtx_tmp = gen_reg_rtx (V4SImode);
2374 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
2376 /* Little endian word numbering for operand is 3 2 1 0.
2377 take (operand[1] operand[1]) and shift left three words
2378 0 1 2 3 0 1 2 3 => 3 0 1 2
2379 Words 0 and 2 are now where they need to be for the result. */
2380 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2385 [(set_attr "type" "veccomplex")])
2387 ;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since
2388 ;; since the xvrdpiz instruction does not truncate the value if the floating
2389 ;; point value is < LONG_MIN or > LONG_MAX.
2390 (define_insn "*vsx_float_fix_v2df2"
2391 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
2394 (match_operand:V2DF 1 "vsx_register_operand" "wd,?wa"))))]
2395 "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
2396 && VECTOR_UNIT_VSX_P (V2DFmode) && flag_unsafe_math_optimizations
2397 && !flag_trapping_math && TARGET_FRIZ"
2399 [(set_attr "type" "vecdouble")
2400 (set_attr "fp_type" "fp_addsub_d")])
2403 ;; Permute operations
2405 ;; Build a V2DF/V2DI vector from two scalars
2406 (define_insn "vsx_concat_<mode>"
2407 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we")
2409 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa,b")
2410 (match_operand:<VS_scalar> 2 "gpc_reg_operand" "wa,b")))]
2411 "VECTOR_MEM_VSX_P (<MODE>mode)"
2413 if (which_alternative == 0)
2414 return (BYTES_BIG_ENDIAN
2415 ? "xxpermdi %x0,%x1,%x2,0"
2416 : "xxpermdi %x0,%x2,%x1,0");
2418 else if (which_alternative == 1)
2419 return (BYTES_BIG_ENDIAN
2420 ? "mtvsrdd %x0,%1,%2"
2421 : "mtvsrdd %x0,%2,%1");
2426 [(set_attr "type" "vecperm")])
2428 ;; Combiner patterns to allow creating XXPERMDI's to access either double
2429 ;; word element in a vector register.
2430 (define_insn "*vsx_concat_<mode>_1"
2431 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2433 (vec_select:<VS_scalar>
2434 (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
2435 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
2436 (match_operand:<VS_scalar> 3 "gpc_reg_operand" "wa")))]
2437 "VECTOR_MEM_VSX_P (<MODE>mode)"
2439 HOST_WIDE_INT dword = INTVAL (operands[2]);
2440 if (BYTES_BIG_ENDIAN)
2442 operands[4] = GEN_INT (2*dword);
2443 return "xxpermdi %x0,%x1,%x3,%4";
2447 operands[4] = GEN_INT (!dword);
2448 return "xxpermdi %x0,%x3,%x1,%4";
2451 [(set_attr "type" "vecperm")])
2453 (define_insn "*vsx_concat_<mode>_2"
2454 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2456 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa")
2457 (vec_select:<VS_scalar>
2458 (match_operand:VSX_D 2 "gpc_reg_operand" "wa")
2459 (parallel [(match_operand:QI 3 "const_0_to_1_operand" "n")]))))]
2460 "VECTOR_MEM_VSX_P (<MODE>mode)"
2462 HOST_WIDE_INT dword = INTVAL (operands[3]);
2463 if (BYTES_BIG_ENDIAN)
2465 operands[4] = GEN_INT (dword);
2466 return "xxpermdi %x0,%x1,%x2,%4";
2470 operands[4] = GEN_INT (2 * !dword);
2471 return "xxpermdi %x0,%x2,%x1,%4";
2474 [(set_attr "type" "vecperm")])
2476 (define_insn "*vsx_concat_<mode>_3"
2477 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2479 (vec_select:<VS_scalar>
2480 (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
2481 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
2482 (vec_select:<VS_scalar>
2483 (match_operand:VSX_D 3 "gpc_reg_operand" "wa")
2484 (parallel [(match_operand:QI 4 "const_0_to_1_operand" "n")]))))]
2485 "VECTOR_MEM_VSX_P (<MODE>mode)"
2487 HOST_WIDE_INT dword1 = INTVAL (operands[2]);
2488 HOST_WIDE_INT dword2 = INTVAL (operands[4]);
2489 if (BYTES_BIG_ENDIAN)
2491 operands[5] = GEN_INT ((2 * dword1) + dword2);
2492 return "xxpermdi %x0,%x1,%x3,%5";
2496 operands[5] = GEN_INT ((2 * !dword2) + !dword1);
2497 return "xxpermdi %x0,%x3,%x1,%5";
2500 [(set_attr "type" "vecperm")])
2502 ;; Special purpose concat using xxpermdi to glue two single precision values
2503 ;; together, relying on the fact that internally scalar floats are represented
2504 ;; as doubles. This is used to initialize a V4SF vector with 4 floats
2505 (define_insn "vsx_concat_v2sf"
2506 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2508 [(match_operand:SF 1 "vsx_register_operand" "ww")
2509 (match_operand:SF 2 "vsx_register_operand" "ww")]
2510 UNSPEC_VSX_CONCAT))]
2511 "VECTOR_MEM_VSX_P (V2DFmode)"
2513 if (BYTES_BIG_ENDIAN)
2514 return "xxpermdi %x0,%x1,%x2,0";
2516 return "xxpermdi %x0,%x2,%x1,0";
2518 [(set_attr "type" "vecperm")])
2520 ;; V4SImode initialization splitter
2521 (define_insn_and_split "vsx_init_v4si"
2522 [(set (match_operand:V4SI 0 "gpc_reg_operand" "=&r")
2524 [(match_operand:SI 1 "reg_or_cint_operand" "rn")
2525 (match_operand:SI 2 "reg_or_cint_operand" "rn")
2526 (match_operand:SI 3 "reg_or_cint_operand" "rn")
2527 (match_operand:SI 4 "reg_or_cint_operand" "rn")]
2528 UNSPEC_VSX_VEC_INIT))
2529 (clobber (match_scratch:DI 5 "=&r"))
2530 (clobber (match_scratch:DI 6 "=&r"))]
2531 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
2533 "&& reload_completed"
2536 rs6000_split_v4si_init (operands);
2540 ;; xxpermdi for little endian loads and stores. We need several of
2541 ;; these since the form of the PARALLEL differs by mode.
2542 (define_insn "*vsx_xxpermdi2_le_<mode>"
2543 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>")
2545 (match_operand:VSX_D 1 "vsx_register_operand" "<VSa>")
2546 (parallel [(const_int 1) (const_int 0)])))]
2547 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
2548 "xxpermdi %x0,%x1,%x1,2"
2549 [(set_attr "type" "vecperm")])
2551 (define_insn "*vsx_xxpermdi4_le_<mode>"
2552 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
2554 (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
2555 (parallel [(const_int 2) (const_int 3)
2556 (const_int 0) (const_int 1)])))]
2557 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
2558 "xxpermdi %x0,%x1,%x1,2"
2559 [(set_attr "type" "vecperm")])
2561 (define_insn "*vsx_xxpermdi8_le_V8HI"
2562 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
2564 (match_operand:V8HI 1 "vsx_register_operand" "wa")
2565 (parallel [(const_int 4) (const_int 5)
2566 (const_int 6) (const_int 7)
2567 (const_int 0) (const_int 1)
2568 (const_int 2) (const_int 3)])))]
2569 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"
2570 "xxpermdi %x0,%x1,%x1,2"
2571 [(set_attr "type" "vecperm")])
2573 (define_insn "*vsx_xxpermdi16_le_V16QI"
2574 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
2576 (match_operand:V16QI 1 "vsx_register_operand" "wa")
2577 (parallel [(const_int 8) (const_int 9)
2578 (const_int 10) (const_int 11)
2579 (const_int 12) (const_int 13)
2580 (const_int 14) (const_int 15)
2581 (const_int 0) (const_int 1)
2582 (const_int 2) (const_int 3)
2583 (const_int 4) (const_int 5)
2584 (const_int 6) (const_int 7)])))]
2585 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"
2586 "xxpermdi %x0,%x1,%x1,2"
2587 [(set_attr "type" "vecperm")])
2589 ;; lxvd2x for little endian loads. We need several of
2590 ;; these since the form of the PARALLEL differs by mode.
2591 (define_insn "*vsx_lxvd2x2_le_<mode>"
2592 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>")
2594 (match_operand:VSX_D 1 "memory_operand" "Z")
2595 (parallel [(const_int 1) (const_int 0)])))]
2596 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
2598 [(set_attr "type" "vecload")])
2600 (define_insn "*vsx_lxvd2x4_le_<mode>"
2601 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
2603 (match_operand:VSX_W 1 "memory_operand" "Z")
2604 (parallel [(const_int 2) (const_int 3)
2605 (const_int 0) (const_int 1)])))]
2606 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
2608 [(set_attr "type" "vecload")])
2610 (define_insn "*vsx_lxvd2x8_le_V8HI"
2611 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
2613 (match_operand:V8HI 1 "memory_operand" "Z")
2614 (parallel [(const_int 4) (const_int 5)
2615 (const_int 6) (const_int 7)
2616 (const_int 0) (const_int 1)
2617 (const_int 2) (const_int 3)])))]
2618 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
2620 [(set_attr "type" "vecload")])
2622 (define_insn "*vsx_lxvd2x16_le_V16QI"
2623 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
2625 (match_operand:V16QI 1 "memory_operand" "Z")
2626 (parallel [(const_int 8) (const_int 9)
2627 (const_int 10) (const_int 11)
2628 (const_int 12) (const_int 13)
2629 (const_int 14) (const_int 15)
2630 (const_int 0) (const_int 1)
2631 (const_int 2) (const_int 3)
2632 (const_int 4) (const_int 5)
2633 (const_int 6) (const_int 7)])))]
2634 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
2636 [(set_attr "type" "vecload")])
2638 ;; stxvd2x for little endian stores. We need several of
2639 ;; these since the form of the PARALLEL differs by mode.
2640 (define_insn "*vsx_stxvd2x2_le_<mode>"
2641 [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
2643 (match_operand:VSX_D 1 "vsx_register_operand" "<VSa>")
2644 (parallel [(const_int 1) (const_int 0)])))]
2645 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
2647 [(set_attr "type" "vecstore")])
2649 (define_insn "*vsx_stxvd2x4_le_<mode>"
2650 [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
2652 (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
2653 (parallel [(const_int 2) (const_int 3)
2654 (const_int 0) (const_int 1)])))]
2655 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
2657 [(set_attr "type" "vecstore")])
2659 (define_insn "*vsx_stxvd2x8_le_V8HI"
2660 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
2662 (match_operand:V8HI 1 "vsx_register_operand" "wa")
2663 (parallel [(const_int 4) (const_int 5)
2664 (const_int 6) (const_int 7)
2665 (const_int 0) (const_int 1)
2666 (const_int 2) (const_int 3)])))]
2667 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
2669 [(set_attr "type" "vecstore")])
2671 (define_insn "*vsx_stxvd2x16_le_V16QI"
2672 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
2674 (match_operand:V16QI 1 "vsx_register_operand" "wa")
2675 (parallel [(const_int 8) (const_int 9)
2676 (const_int 10) (const_int 11)
2677 (const_int 12) (const_int 13)
2678 (const_int 14) (const_int 15)
2679 (const_int 0) (const_int 1)
2680 (const_int 2) (const_int 3)
2681 (const_int 4) (const_int 5)
2682 (const_int 6) (const_int 7)])))]
2683 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
2685 [(set_attr "type" "vecstore")])
2687 ;; Convert a TImode value into V1TImode
2688 (define_expand "vsx_set_v1ti"
2689 [(match_operand:V1TI 0 "nonimmediate_operand" "")
2690 (match_operand:V1TI 1 "nonimmediate_operand" "")
2691 (match_operand:TI 2 "input_operand" "")
2692 (match_operand:QI 3 "u5bit_cint_operand" "")]
2693 "VECTOR_MEM_VSX_P (V1TImode)"
2695 if (operands[3] != const0_rtx)
2698 emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1]));
2702 ;; Rewrite V2DF/V2DI set in terms of VEC_CONCAT
2703 (define_expand "vsx_set_<mode>"
2704 [(use (match_operand:VSX_D 0 "vsx_register_operand"))
2705 (use (match_operand:VSX_D 1 "vsx_register_operand"))
2706 (use (match_operand:<VS_scalar> 2 "gpc_reg_operand"))
2707 (use (match_operand:QI 3 "const_0_to_1_operand"))]
2708 "VECTOR_MEM_VSX_P (<MODE>mode)"
2710 rtx dest = operands[0];
2711 rtx vec_reg = operands[1];
2712 rtx value = operands[2];
2713 rtx ele = operands[3];
2714 rtx tmp = gen_reg_rtx (<VS_scalar>mode);
2716 if (ele == const0_rtx)
2718 emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const1_rtx));
2719 emit_insn (gen_vsx_concat_<mode> (dest, value, tmp));
2722 else if (ele == const1_rtx)
2724 emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const0_rtx));
2725 emit_insn (gen_vsx_concat_<mode> (dest, tmp, value));
2732 ;; Extract a DF/DI element from V2DF/V2DI
2733 ;; Optimize cases were we can do a simple or direct move.
2734 ;; Or see if we can avoid doing the move at all
2736 ;; There are some unresolved problems with reload that show up if an Altivec
2737 ;; register was picked. Limit the scalar value to FPRs for now.
2739 (define_insn "vsx_extract_<mode>"
2740 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=d, d, wr, wr")
2742 (vec_select:<VS_scalar>
2743 (match_operand:VSX_D 1 "gpc_reg_operand" "<VSa>, <VSa>, wm, wo")
2746 [(match_operand:QI 2 "const_0_to_1_operand" "wD, n, wD, n")])))]
2747 "VECTOR_MEM_VSX_P (<MODE>mode)"
2749 int element = INTVAL (operands[2]);
2750 int op0_regno = REGNO (operands[0]);
2751 int op1_regno = REGNO (operands[1]);
2754 gcc_assert (IN_RANGE (element, 0, 1));
2755 gcc_assert (VSX_REGNO_P (op1_regno));
2757 if (element == VECTOR_ELEMENT_SCALAR_64BIT)
2759 if (op0_regno == op1_regno)
2760 return ASM_COMMENT_START " vec_extract to same register";
2762 else if (INT_REGNO_P (op0_regno) && TARGET_DIRECT_MOVE
2763 && TARGET_POWERPC64)
2764 return "mfvsrd %0,%x1";
2766 else if (FP_REGNO_P (op0_regno) && FP_REGNO_P (op1_regno))
2769 else if (VSX_REGNO_P (op0_regno))
2770 return "xxlor %x0,%x1,%x1";
2776 else if (element == VECTOR_ELEMENT_MFVSRLD_64BIT && INT_REGNO_P (op0_regno)
2777 && TARGET_P9_VECTOR && TARGET_POWERPC64 && TARGET_DIRECT_MOVE)
2778 return "mfvsrld %0,%x1";
2780 else if (VSX_REGNO_P (op0_regno))
2782 fldDM = element << 1;
2783 if (!BYTES_BIG_ENDIAN)
2785 operands[3] = GEN_INT (fldDM);
2786 return "xxpermdi %x0,%x1,%x1,%3";
2792 [(set_attr "type" "veclogical,mftgpr,mftgpr,vecperm")])
2794 ;; Optimize extracting a single scalar element from memory.
2795 (define_insn_and_split "*vsx_extract_<P:mode>_<VSX_D:mode>_load"
2796 [(set (match_operand:<VS_scalar> 0 "register_operand" "=<VSX_D:VS_64reg>,wr")
2797 (vec_select:<VSX_D:VS_scalar>
2798 (match_operand:VSX_D 1 "memory_operand" "m,m")
2799 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n,n")])))
2800 (clobber (match_scratch:P 3 "=&b,&b"))]
2801 "VECTOR_MEM_VSX_P (<VSX_D:MODE>mode)"
2803 "&& reload_completed"
2804 [(set (match_dup 0) (match_dup 4))]
2806 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
2807 operands[3], <VSX_D:VS_scalar>mode);
2809 [(set_attr "type" "fpload,load")
2810 (set_attr "length" "8")])
2812 ;; Optimize storing a single scalar element that is the right location to
2814 (define_insn "*vsx_extract_<mode>_store"
2815 [(set (match_operand:<VS_scalar> 0 "memory_operand" "=m,Z,wY")
2816 (vec_select:<VS_scalar>
2817 (match_operand:VSX_D 1 "register_operand" "d,wv,wb")
2818 (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))]
2819 "VECTOR_MEM_VSX_P (<MODE>mode)"
2824 [(set_attr "type" "fpstore")
2825 (set_attr "length" "4")])
2827 ;; Variable V2DI/V2DF extract shift
2828 (define_insn "vsx_vslo_<mode>"
2829 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v")
2830 (unspec:<VS_scalar> [(match_operand:VSX_D 1 "gpc_reg_operand" "v")
2831 (match_operand:V2DI 2 "gpc_reg_operand" "v")]
2833 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
2835 [(set_attr "type" "vecperm")])
2837 ;; Variable V2DI/V2DF extract
2838 (define_insn_and_split "vsx_extract_<mode>_var"
2839 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v,<VSa>,r")
2840 (unspec:<VS_scalar> [(match_operand:VSX_D 1 "input_operand" "v,m,m")
2841 (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
2842 UNSPEC_VSX_EXTRACT))
2843 (clobber (match_scratch:DI 3 "=r,&b,&b"))
2844 (clobber (match_scratch:V2DI 4 "=&v,X,X"))]
2845 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
2847 "&& reload_completed"
2850 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
2851 operands[3], operands[4]);
2855 ;; Extract a SF element from V4SF
2856 (define_insn_and_split "vsx_extract_v4sf"
2857 [(set (match_operand:SF 0 "vsx_register_operand" "=ww")
2859 (match_operand:V4SF 1 "vsx_register_operand" "wa")
2860 (parallel [(match_operand:QI 2 "u5bit_cint_operand" "n")])))
2861 (clobber (match_scratch:V4SF 3 "=0"))]
2862 "VECTOR_UNIT_VSX_P (V4SFmode)"
2867 rtx op0 = operands[0];
2868 rtx op1 = operands[1];
2869 rtx op2 = operands[2];
2870 rtx op3 = operands[3];
2872 HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2);
2878 if (GET_CODE (op3) == SCRATCH)
2879 op3 = gen_reg_rtx (V4SFmode);
2880 emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, GEN_INT (ele)));
2883 emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp));
2886 [(set_attr "length" "8")
2887 (set_attr "type" "fp")])
2889 (define_insn_and_split "*vsx_extract_v4sf_<mode>_load"
2890 [(set (match_operand:SF 0 "register_operand" "=f,wv,wb,?r")
2892 (match_operand:V4SF 1 "memory_operand" "m,Z,m,m")
2893 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
2894 (clobber (match_scratch:P 3 "=&b,&b,&b,&b"))]
2895 "VECTOR_MEM_VSX_P (V4SFmode)"
2897 "&& reload_completed"
2898 [(set (match_dup 0) (match_dup 4))]
2900 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
2901 operands[3], SFmode);
2903 [(set_attr "type" "fpload,fpload,fpload,load")
2904 (set_attr "length" "8")])
2906 ;; Variable V4SF extract
2907 (define_insn_and_split "vsx_extract_v4sf_var"
2908 [(set (match_operand:SF 0 "gpc_reg_operand" "=ww,ww,?r")
2909 (unspec:SF [(match_operand:V4SF 1 "input_operand" "v,m,m")
2910 (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
2911 UNSPEC_VSX_EXTRACT))
2912 (clobber (match_scratch:DI 3 "=r,&b,&b"))
2913 (clobber (match_scratch:V2DI 4 "=&v,X,X"))]
2914 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT"
2916 "&& reload_completed"
2919 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
2920 operands[3], operands[4]);
2924 ;; Expand the builtin form of xxpermdi to canonical rtl.
2925 (define_expand "vsx_xxpermdi_<mode>"
2926 [(match_operand:VSX_L 0 "vsx_register_operand")
2927 (match_operand:VSX_L 1 "vsx_register_operand")
2928 (match_operand:VSX_L 2 "vsx_register_operand")
2929 (match_operand:QI 3 "u5bit_cint_operand")]
2930 "VECTOR_MEM_VSX_P (<MODE>mode)"
2932 rtx target = operands[0];
2933 rtx op0 = operands[1];
2934 rtx op1 = operands[2];
2935 int mask = INTVAL (operands[3]);
2936 rtx perm0 = GEN_INT ((mask >> 1) & 1);
2937 rtx perm1 = GEN_INT ((mask & 1) + 2);
2938 rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
2940 if (<MODE>mode == V2DFmode)
2941 gen = gen_vsx_xxpermdi2_v2df_1;
2944 gen = gen_vsx_xxpermdi2_v2di_1;
2945 if (<MODE>mode != V2DImode)
2947 target = gen_lowpart (V2DImode, target);
2948 op0 = gen_lowpart (V2DImode, op0);
2949 op1 = gen_lowpart (V2DImode, op1);
2952 emit_insn (gen (target, op0, op1, perm0, perm1));
2956 ;; Special version of xxpermdi that retains big-endian semantics.
2957 (define_expand "vsx_xxpermdi_<mode>_be"
2958 [(match_operand:VSX_L 0 "vsx_register_operand")
2959 (match_operand:VSX_L 1 "vsx_register_operand")
2960 (match_operand:VSX_L 2 "vsx_register_operand")
2961 (match_operand:QI 3 "u5bit_cint_operand")]
2962 "VECTOR_MEM_VSX_P (<MODE>mode)"
2964 rtx target = operands[0];
2965 rtx op0 = operands[1];
2966 rtx op1 = operands[2];
2967 int mask = INTVAL (operands[3]);
2968 rtx perm0 = GEN_INT ((mask >> 1) & 1);
2969 rtx perm1 = GEN_INT ((mask & 1) + 2);
2970 rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
2972 if (<MODE>mode == V2DFmode)
2973 gen = gen_vsx_xxpermdi2_v2df_1;
2976 gen = gen_vsx_xxpermdi2_v2di_1;
2977 if (<MODE>mode != V2DImode)
2979 target = gen_lowpart (V2DImode, target);
2980 op0 = gen_lowpart (V2DImode, op0);
2981 op1 = gen_lowpart (V2DImode, op1);
2984 /* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a
2985 transformation we don't want; it is necessary for
2986 rs6000_expand_vec_perm_const_1 but not for this use. So we
2987 prepare for that by reversing the transformation here. */
2988 if (BYTES_BIG_ENDIAN)
2989 emit_insn (gen (target, op0, op1, perm0, perm1));
2992 rtx p0 = GEN_INT (3 - INTVAL (perm1));
2993 rtx p1 = GEN_INT (3 - INTVAL (perm0));
2994 emit_insn (gen (target, op1, op0, p0, p1));
2999 (define_insn "vsx_xxpermdi2_<mode>_1"
3000 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd")
3002 (vec_concat:<VS_double>
3003 (match_operand:VSX_D 1 "vsx_register_operand" "wd")
3004 (match_operand:VSX_D 2 "vsx_register_operand" "wd"))
3005 (parallel [(match_operand 3 "const_0_to_1_operand" "")
3006 (match_operand 4 "const_2_to_3_operand" "")])))]
3007 "VECTOR_MEM_VSX_P (<MODE>mode)"
3011 /* For little endian, swap operands and invert/swap selectors
3012 to get the correct xxpermdi. The operand swap sets up the
3013 inputs as a little endian array. The selectors are swapped
3014 because they are defined to use big endian ordering. The
3015 selectors are inverted to get the correct doublewords for
3016 little endian ordering. */
3017 if (BYTES_BIG_ENDIAN)
3019 op3 = INTVAL (operands[3]);
3020 op4 = INTVAL (operands[4]);
3024 op3 = 3 - INTVAL (operands[4]);
3025 op4 = 3 - INTVAL (operands[3]);
3028 mask = (op3 << 1) | (op4 - 2);
3029 operands[3] = GEN_INT (mask);
3031 if (BYTES_BIG_ENDIAN)
3032 return "xxpermdi %x0,%x1,%x2,%3";
3034 return "xxpermdi %x0,%x2,%x1,%3";
3036 [(set_attr "type" "vecperm")])
3038 (define_expand "vec_perm_const<mode>"
3039 [(match_operand:VSX_D 0 "vsx_register_operand" "")
3040 (match_operand:VSX_D 1 "vsx_register_operand" "")
3041 (match_operand:VSX_D 2 "vsx_register_operand" "")
3042 (match_operand:V2DI 3 "" "")]
3043 "VECTOR_MEM_VSX_P (<MODE>mode)"
3045 if (rs6000_expand_vec_perm_const (operands))
3051 ;; Extraction of a single element in a small integer vector. Until ISA 3.0,
3052 ;; none of the small types were allowed in a vector register, so we had to
3053 ;; extract to a DImode and either do a direct move or store.
3054 (define_expand "vsx_extract_<mode>"
3055 [(parallel [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand")
3056 (vec_select:<VS_scalar>
3057 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand")
3058 (parallel [(match_operand:QI 2 "const_int_operand")])))
3059 (clobber (match_scratch:VSX_EXTRACT_I 3))])]
3060 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3062 /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}. */
3063 if (TARGET_P9_VECTOR)
3065 emit_insn (gen_vsx_extract_<mode>_p9 (operands[0], operands[1],
3071 (define_insn "vsx_extract_<mode>_p9"
3072 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,<VSX_EX>")
3073 (vec_select:<VS_scalar>
3074 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "wK,<VSX_EX>")
3075 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n,n")])))
3076 (clobber (match_scratch:SI 3 "=r,X"))]
3077 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3079 if (which_alternative == 0)
3084 HOST_WIDE_INT elt = INTVAL (operands[2]);
3085 HOST_WIDE_INT elt_adj = (!VECTOR_ELT_ORDER_BIG
3086 ? GET_MODE_NUNITS (<MODE>mode) - 1 - elt
3089 HOST_WIDE_INT unit_size = GET_MODE_UNIT_SIZE (<MODE>mode);
3090 HOST_WIDE_INT offset = unit_size * elt_adj;
3092 operands[2] = GEN_INT (offset);
3094 return "xxextractuw %x0,%x1,%2";
3096 return "vextractu<wd> %0,%1,%2";
3099 [(set_attr "type" "vecsimple")])
3102 [(set (match_operand:<VS_scalar> 0 "int_reg_operand")
3103 (vec_select:<VS_scalar>
3104 (match_operand:VSX_EXTRACT_I 1 "altivec_register_operand")
3105 (parallel [(match_operand:QI 2 "const_int_operand")])))
3106 (clobber (match_operand:SI 3 "int_reg_operand"))]
3107 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB && reload_completed"
3110 rtx op0_si = gen_rtx_REG (SImode, REGNO (operands[0]));
3111 rtx op1 = operands[1];
3112 rtx op2 = operands[2];
3113 rtx op3 = operands[3];
3114 HOST_WIDE_INT offset = INTVAL (op2) * GET_MODE_UNIT_SIZE (<MODE>mode);
3116 emit_move_insn (op3, GEN_INT (offset));
3117 if (VECTOR_ELT_ORDER_BIG)
3118 emit_insn (gen_vextu<wd>lx (op0_si, op3, op1));
3120 emit_insn (gen_vextu<wd>rx (op0_si, op3, op1));
3124 ;; Optimize zero extracts to eliminate the AND after the extract.
3125 (define_insn_and_split "*vsx_extract_<mode>_di_p9"
3126 [(set (match_operand:DI 0 "gpc_reg_operand" "=r,<VSX_EX>")
3128 (vec_select:<VS_scalar>
3129 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "wK,<VSX_EX>")
3130 (parallel [(match_operand:QI 2 "const_int_operand" "n,n")]))))
3131 (clobber (match_scratch:SI 3 "=r,X"))]
3132 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3134 "&& reload_completed"
3135 [(parallel [(set (match_dup 4)
3136 (vec_select:<VS_scalar>
3138 (parallel [(match_dup 2)])))
3139 (clobber (match_dup 3))])]
3141 operands[4] = gen_rtx_REG (<VS_scalar>mode, REGNO (operands[0]));
3144 ;; Optimize stores to use the ISA 3.0 scalar store instructions
3145 (define_insn_and_split "*vsx_extract_<mode>_store_p9"
3146 [(set (match_operand:<VS_scalar> 0 "memory_operand" "=Z,m")
3147 (vec_select:<VS_scalar>
3148 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>,v")
3149 (parallel [(match_operand:QI 2 "const_int_operand" "n,n")])))
3150 (clobber (match_scratch:<VS_scalar> 3 "=<VSX_EX>,&r"))
3151 (clobber (match_scratch:SI 4 "=X,&r"))]
3152 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3154 "&& reload_completed"
3155 [(parallel [(set (match_dup 3)
3156 (vec_select:<VS_scalar>
3158 (parallel [(match_dup 2)])))
3159 (clobber (match_dup 4))])
3163 (define_insn_and_split "*vsx_extract_si"
3164 [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wHwI,Z")
3166 (match_operand:V4SI 1 "gpc_reg_operand" "wJv,wJv,wJv")
3167 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")])))
3168 (clobber (match_scratch:V4SI 3 "=wJv,wJv,wJv"))]
3169 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT && !TARGET_P9_VECTOR"
3171 "&& reload_completed"
3174 rtx dest = operands[0];
3175 rtx src = operands[1];
3176 rtx element = operands[2];
3177 rtx vec_tmp = operands[3];
3180 if (!VECTOR_ELT_ORDER_BIG)
3181 element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3183 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3185 value = INTVAL (element);
3187 emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element));
3191 if (MEM_P (operands[0]))
3193 if (can_create_pseudo_p ())
3194 dest = rs6000_address_for_fpconvert (dest);
3196 if (TARGET_P8_VECTOR)
3197 emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
3199 emit_insn (gen_stfiwx (dest, gen_rtx_REG (DImode, REGNO (vec_tmp))));
3202 else if (TARGET_P8_VECTOR)
3203 emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
3205 emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
3206 gen_rtx_REG (DImode, REGNO (vec_tmp)));
3210 [(set_attr "type" "mftgpr,vecperm,fpstore")
3211 (set_attr "length" "8")])
3213 (define_insn_and_split "*vsx_extract_<mode>_p8"
3214 [(set (match_operand:<VS_scalar> 0 "nonimmediate_operand" "=r")
3215 (vec_select:<VS_scalar>
3216 (match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand" "v")
3217 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
3218 (clobber (match_scratch:VSX_EXTRACT_I2 3 "=v"))]
3219 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3220 && !TARGET_P9_VECTOR"
3222 "&& reload_completed"
3225 rtx dest = operands[0];
3226 rtx src = operands[1];
3227 rtx element = operands[2];
3228 rtx vec_tmp = operands[3];
3231 if (!VECTOR_ELT_ORDER_BIG)
3232 element = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1 - INTVAL (element));
3234 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3236 value = INTVAL (element);
3237 if (<MODE>mode == V16QImode)
3240 emit_insn (gen_altivec_vspltb_direct (vec_tmp, src, element));
3244 else if (<MODE>mode == V8HImode)
3247 emit_insn (gen_altivec_vsplth_direct (vec_tmp, src, element));
3254 emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
3255 gen_rtx_REG (DImode, REGNO (vec_tmp)));
3258 [(set_attr "type" "mftgpr")])
3260 ;; Optimize extracting a single scalar element from memory.
3261 (define_insn_and_split "*vsx_extract_<mode>_load"
3262 [(set (match_operand:<VS_scalar> 0 "register_operand" "=r")
3263 (vec_select:<VS_scalar>
3264 (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m")
3265 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
3266 (clobber (match_scratch:DI 3 "=&b"))]
3267 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3269 "&& reload_completed"
3270 [(set (match_dup 0) (match_dup 4))]
3272 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3273 operands[3], <VS_scalar>mode);
3275 [(set_attr "type" "load")
3276 (set_attr "length" "8")])
3278 ;; Variable V16QI/V8HI/V4SI extract
3279 (define_insn_and_split "vsx_extract_<mode>_var"
3280 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,r,r")
3282 [(match_operand:VSX_EXTRACT_I 1 "input_operand" "wK,v,m")
3283 (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3284 UNSPEC_VSX_EXTRACT))
3285 (clobber (match_scratch:DI 3 "=r,r,&b"))
3286 (clobber (match_scratch:V2DI 4 "=X,&v,X"))]
3287 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3289 "&& reload_completed"
3292 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3293 operands[3], operands[4]);
3297 (define_insn_and_split "*vsx_extract_<VSX_EXTRACT_I:mode>_<SDI:mode>_var"
3298 [(set (match_operand:SDI 0 "gpc_reg_operand" "=r,r,r")
3300 (unspec:<VSX_EXTRACT_I:VS_scalar>
3301 [(match_operand:VSX_EXTRACT_I 1 "input_operand" "wK,v,m")
3302 (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3303 UNSPEC_VSX_EXTRACT)))
3304 (clobber (match_scratch:DI 3 "=r,r,&b"))
3305 (clobber (match_scratch:V2DI 4 "=X,&v,X"))]
3306 "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3308 "&& reload_completed"
3311 machine_mode smode = <VSX_EXTRACT_I:MODE>mode;
3312 rs6000_split_vec_extract_var (gen_rtx_REG (smode, REGNO (operands[0])),
3313 operands[1], operands[2],
3314 operands[3], operands[4]);
3318 ;; VSX_EXTRACT optimizations
3319 ;; Optimize double d = (double) vec_extract (vi, <n>)
3320 ;; Get the element into the top position and use XVCVSWDP/XVCVUWDP
3321 (define_insn_and_split "*vsx_extract_si_<uns>float_df"
3322 [(set (match_operand:DF 0 "gpc_reg_operand" "=ws")
3325 (match_operand:V4SI 1 "gpc_reg_operand" "v")
3326 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
3327 (clobber (match_scratch:V4SI 3 "=v"))]
3328 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3333 rtx dest = operands[0];
3334 rtx src = operands[1];
3335 rtx element = operands[2];
3336 rtx v4si_tmp = operands[3];
3339 if (!VECTOR_ELT_ORDER_BIG)
3340 element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3342 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3344 value = INTVAL (element);
3347 if (GET_CODE (v4si_tmp) == SCRATCH)
3348 v4si_tmp = gen_reg_rtx (V4SImode);
3349 emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
3354 emit_insn (gen_vsx_xvcv<su>xwdp_df (dest, v4si_tmp));
3358 ;; Optimize <type> f = (<type>) vec_extract (vi, <n>)
3359 ;; where <type> is a floating point type that supported by the hardware that is
3360 ;; not double. First convert the value to double, and then to the desired
3362 (define_insn_and_split "*vsx_extract_si_<uns>float_<mode>"
3363 [(set (match_operand:VSX_EXTRACT_FL 0 "gpc_reg_operand" "=ww")
3364 (any_float:VSX_EXTRACT_FL
3366 (match_operand:V4SI 1 "gpc_reg_operand" "v")
3367 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
3368 (clobber (match_scratch:V4SI 3 "=v"))
3369 (clobber (match_scratch:DF 4 "=ws"))]
3370 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3375 rtx dest = operands[0];
3376 rtx src = operands[1];
3377 rtx element = operands[2];
3378 rtx v4si_tmp = operands[3];
3379 rtx df_tmp = operands[4];
3382 if (!VECTOR_ELT_ORDER_BIG)
3383 element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3385 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3387 value = INTVAL (element);
3390 if (GET_CODE (v4si_tmp) == SCRATCH)
3391 v4si_tmp = gen_reg_rtx (V4SImode);
3392 emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
3397 if (GET_CODE (df_tmp) == SCRATCH)
3398 df_tmp = gen_reg_rtx (DFmode);
3400 emit_insn (gen_vsx_xvcv<su>xwdp_df (df_tmp, v4si_tmp));
3402 if (<MODE>mode == SFmode)
3403 emit_insn (gen_truncdfsf2 (dest, df_tmp));
3404 else if (<MODE>mode == TFmode && FLOAT128_IBM_P (TFmode))
3405 emit_insn (gen_extenddftf2_vsx (dest, df_tmp));
3406 else if (<MODE>mode == TFmode && FLOAT128_IEEE_P (TFmode)
3407 && TARGET_FLOAT128_HW)
3408 emit_insn (gen_extenddftf2_hw (dest, df_tmp));
3409 else if (<MODE>mode == IFmode && FLOAT128_IBM_P (IFmode))
3410 emit_insn (gen_extenddfif2 (dest, df_tmp));
3411 else if (<MODE>mode == KFmode && TARGET_FLOAT128_HW)
3412 emit_insn (gen_extenddfkf2_hw (dest, df_tmp));
3419 ;; Optimize <type> f = (<ftype>) vec_extract (<vtype>, <n>)
3420 ;; Where <ftype> is SFmode, DFmode (and KFmode/TFmode if those types are IEEE
3421 ;; 128-bit hardware types) and <vtype> is vector char, vector unsigned char,
3422 ;; vector short or vector unsigned short.
3423 (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_fl_<FL_CONV:mode>"
3424 [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=<FL_CONV:VSr3>")
3426 (vec_select:<VSX_EXTRACT_I:VS_scalar>
3427 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
3428 (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
3429 (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
3430 "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3431 && TARGET_P9_VECTOR"
3433 "&& reload_completed"
3434 [(parallel [(set (match_dup 3)
3435 (vec_select:<VSX_EXTRACT_I:VS_scalar>
3437 (parallel [(match_dup 2)])))
3438 (clobber (scratch:SI))])
3440 (sign_extend:DI (match_dup 3)))
3442 (float:<FL_CONV:MODE> (match_dup 4)))]
3444 operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
3447 (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_ufl_<FL_CONV:mode>"
3448 [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=<FL_CONV:VSr3>")
3449 (unsigned_float:FL_CONV
3450 (vec_select:<VSX_EXTRACT_I:VS_scalar>
3451 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
3452 (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
3453 (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
3454 "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3455 && TARGET_P9_VECTOR"
3457 "&& reload_completed"
3458 [(parallel [(set (match_dup 3)
3459 (vec_select:<VSX_EXTRACT_I:VS_scalar>
3461 (parallel [(match_dup 2)])))
3462 (clobber (scratch:SI))])
3464 (float:<FL_CONV:MODE> (match_dup 4)))]
3466 operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
3469 ;; V4SI/V8HI/V16QI set operation on ISA 3.0
3470 (define_insn "vsx_set_<mode>_p9"
3471 [(set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=<VSX_EX>")
3472 (unspec:VSX_EXTRACT_I
3473 [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "0")
3474 (match_operand:<VS_scalar> 2 "gpc_reg_operand" "<VSX_EX>")
3475 (match_operand:QI 3 "<VSX_EXTRACT_PREDICATE>" "n")]
3477 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
3479 int ele = INTVAL (operands[3]);
3480 int nunits = GET_MODE_NUNITS (<MODE>mode);
3482 if (!VECTOR_ELT_ORDER_BIG)
3483 ele = nunits - 1 - ele;
3485 operands[3] = GEN_INT (GET_MODE_SIZE (<VS_scalar>mode) * ele);
3486 if (<MODE>mode == V4SImode)
3487 return "xxinsertw %x0,%x2,%3";
3489 return "vinsert<wd> %0,%2,%3";
3491 [(set_attr "type" "vecperm")])
3493 (define_insn_and_split "vsx_set_v4sf_p9"
3494 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
3496 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
3497 (match_operand:SF 2 "gpc_reg_operand" "ww")
3498 (match_operand:QI 3 "const_0_to_3_operand" "n")]
3500 (clobber (match_scratch:SI 4 "=&wJwK"))]
3501 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
3503 "&& reload_completed"
3505 (unspec:V4SF [(match_dup 2)]
3506 UNSPEC_VSX_CVDPSPN))
3507 (parallel [(set (match_dup 4)
3508 (vec_select:SI (match_dup 6)
3509 (parallel [(match_dup 7)])))
3510 (clobber (scratch:SI))])
3512 (unspec:V4SI [(match_dup 8)
3517 unsigned int tmp_regno = reg_or_subregno (operands[4]);
3519 operands[5] = gen_rtx_REG (V4SFmode, tmp_regno);
3520 operands[6] = gen_rtx_REG (V4SImode, tmp_regno);
3521 operands[7] = GEN_INT (VECTOR_ELT_ORDER_BIG ? 1 : 2);
3522 operands[8] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
3524 [(set_attr "type" "vecperm")
3525 (set_attr "length" "12")])
3527 ;; Special case setting 0.0f to a V4SF element
3528 (define_insn_and_split "*vsx_set_v4sf_p9_zero"
3529 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
3531 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
3532 (match_operand:SF 2 "zero_fp_constant" "j")
3533 (match_operand:QI 3 "const_0_to_3_operand" "n")]
3535 (clobber (match_scratch:SI 4 "=&wJwK"))]
3536 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
3538 "&& reload_completed"
3542 (unspec:V4SI [(match_dup 5)
3547 operands[5] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
3549 [(set_attr "type" "vecperm")
3550 (set_attr "length" "8")])
3552 ;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is the element
3553 ;; that is in the default scalar position (1 for big endian, 2 for little
3554 ;; endian). We just need to do an xxinsertw since the element is in the
3555 ;; correct location.
3557 (define_insn "*vsx_insert_extract_v4sf_p9"
3558 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
3560 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
3561 (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
3563 [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
3564 (match_operand:QI 4 "const_0_to_3_operand" "n")]
3566 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64
3567 && (INTVAL (operands[3]) == (VECTOR_ELT_ORDER_BIG ? 1 : 2))"
3569 int ele = INTVAL (operands[4]);
3571 if (!VECTOR_ELT_ORDER_BIG)
3572 ele = GET_MODE_NUNITS (V4SFmode) - 1 - ele;
3574 operands[4] = GEN_INT (GET_MODE_SIZE (SFmode) * ele);
3575 return "xxinsertw %x0,%x2,%4";
3577 [(set_attr "type" "vecperm")])
3579 ;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is not the element
3580 ;; that is in the default scalar position (1 for big endian, 2 for little
3581 ;; endian). Convert the insert/extract to int and avoid doing the conversion.
3583 (define_insn_and_split "*vsx_insert_extract_v4sf_p9_2"
3584 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
3586 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
3587 (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
3589 [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
3590 (match_operand:QI 4 "const_0_to_3_operand" "n")]
3592 (clobber (match_scratch:SI 5 "=&wJwK"))]
3593 "VECTOR_MEM_VSX_P (V4SFmode) && VECTOR_MEM_VSX_P (V4SImode)
3594 && TARGET_P9_VECTOR && TARGET_POWERPC64
3595 && (INTVAL (operands[3]) != (VECTOR_ELT_ORDER_BIG ? 1 : 2))"
3598 [(parallel [(set (match_dup 5)
3599 (vec_select:SI (match_dup 6)
3600 (parallel [(match_dup 3)])))
3601 (clobber (scratch:SI))])
3603 (unspec:V4SI [(match_dup 8)
3608 if (GET_CODE (operands[5]) == SCRATCH)
3609 operands[5] = gen_reg_rtx (SImode);
3611 operands[6] = gen_lowpart (V4SImode, operands[2]);
3612 operands[7] = gen_lowpart (V4SImode, operands[0]);
3613 operands[8] = gen_lowpart (V4SImode, operands[1]);
3615 [(set_attr "type" "vecperm")])
3617 ;; Expanders for builtins
3618 (define_expand "vsx_mergel_<mode>"
3619 [(use (match_operand:VSX_D 0 "vsx_register_operand" ""))
3620 (use (match_operand:VSX_D 1 "vsx_register_operand" ""))
3621 (use (match_operand:VSX_D 2 "vsx_register_operand" ""))]
3622 "VECTOR_MEM_VSX_P (<MODE>mode)"
3627 /* Special handling for LE with -maltivec=be. */
3628 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
3630 v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
3631 x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]);
3635 v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
3636 x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
3639 x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
3640 emit_insn (gen_rtx_SET (operands[0], x));
3644 (define_expand "vsx_mergeh_<mode>"
3645 [(use (match_operand:VSX_D 0 "vsx_register_operand" ""))
3646 (use (match_operand:VSX_D 1 "vsx_register_operand" ""))
3647 (use (match_operand:VSX_D 2 "vsx_register_operand" ""))]
3648 "VECTOR_MEM_VSX_P (<MODE>mode)"
3653 /* Special handling for LE with -maltivec=be. */
3654 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
3656 v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
3657 x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]);
3661 v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
3662 x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
3665 x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
3666 emit_insn (gen_rtx_SET (operands[0], x));
3671 ;; We separate the register splat insn from the memory splat insn to force the
3672 ;; register allocator to generate the indexed form of the SPLAT when it is
3673 ;; given an offsettable memory reference. Otherwise, if the register and
3674 ;; memory insns were combined into a single insn, the register allocator will
3675 ;; load the value into a register, and then do a double word permute.
3676 (define_expand "vsx_splat_<mode>"
3677 [(set (match_operand:VSX_D 0 "vsx_register_operand")
3678 (vec_duplicate:VSX_D
3679 (match_operand:<VS_scalar> 1 "input_operand")))]
3680 "VECTOR_MEM_VSX_P (<MODE>mode)"
3682 rtx op1 = operands[1];
3684 operands[1] = rs6000_address_for_fpconvert (op1);
3685 else if (!REG_P (op1))
3686 op1 = force_reg (<VSX_D:VS_scalar>mode, op1);
3689 (define_insn "vsx_splat_<mode>_reg"
3690 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSX_D:VSa>,?we")
3691 (vec_duplicate:VSX_D
3692 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "<VSX_D:VS_64reg>,b")))]
3693 "VECTOR_MEM_VSX_P (<MODE>mode)"
3695 xxpermdi %x0,%x1,%x1,0
3697 [(set_attr "type" "vecperm")])
3699 (define_insn "vsx_splat_<VSX_D:mode>_mem"
3700 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSX_D:VSa>")
3701 (vec_duplicate:VSX_D
3702 (match_operand:<VSX_D:VS_scalar> 1 "memory_operand" "Z")))]
3703 "VECTOR_MEM_VSX_P (<MODE>mode)"
3705 [(set_attr "type" "vecload")])
3707 ;; V4SI splat support
3708 (define_insn "vsx_splat_v4si"
3709 [(set (match_operand:V4SI 0 "vsx_register_operand" "=we,we")
3711 (match_operand:SI 1 "splat_input_operand" "r,Z")))]
3716 [(set_attr "type" "vecperm,vecload")])
3718 ;; SImode is not currently allowed in vector registers. This pattern
3719 ;; allows us to use direct move to get the value in a vector register
3720 ;; so that we can use XXSPLTW
3721 (define_insn "vsx_splat_v4si_di"
3722 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,we")
3725 (match_operand:DI 1 "gpc_reg_operand" "wj,r"))))]
3726 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3730 [(set_attr "type" "vecperm")])
3732 ;; V4SF splat (ISA 3.0)
3733 (define_insn_and_split "vsx_splat_v4sf"
3734 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,wa")
3736 (match_operand:SF 1 "splat_input_operand" "Z,wy,r")))]
3742 "&& reload_completed && vsx_register_operand (operands[1], SFmode)"
3744 (unspec:V4SF [(match_dup 1)] UNSPEC_VSX_CVDPSPN))
3746 (unspec:V4SF [(match_dup 0)
3747 (const_int 0)] UNSPEC_VSX_XXSPLTW))]
3749 [(set_attr "type" "vecload,vecperm,mftgpr")
3750 (set_attr "length" "4,8,4")])
3752 ;; V4SF/V4SI splat from a vector element
3753 (define_insn "vsx_xxspltw_<mode>"
3754 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
3755 (vec_duplicate:VSX_W
3756 (vec_select:<VS_scalar>
3757 (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
3759 [(match_operand:QI 2 "u5bit_cint_operand" "n")]))))]
3760 "VECTOR_MEM_VSX_P (<MODE>mode)"
3762 if (!BYTES_BIG_ENDIAN)
3763 operands[2] = GEN_INT (3 - INTVAL (operands[2]));
3765 return "xxspltw %x0,%x1,%2";
3767 [(set_attr "type" "vecperm")])
3769 (define_insn "vsx_xxspltw_<mode>_direct"
3770 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
3771 (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
3772 (match_operand:QI 2 "u5bit_cint_operand" "i")]
3773 UNSPEC_VSX_XXSPLTW))]
3774 "VECTOR_MEM_VSX_P (<MODE>mode)"
3775 "xxspltw %x0,%x1,%2"
3776 [(set_attr "type" "vecperm")])
3778 ;; V16QI/V8HI splat support on ISA 2.07
3779 (define_insn "vsx_vsplt<VSX_SPLAT_SUFFIX>_di"
3780 [(set (match_operand:VSX_SPLAT_I 0 "altivec_register_operand" "=v")
3781 (vec_duplicate:VSX_SPLAT_I
3782 (truncate:<VS_scalar>
3783 (match_operand:DI 1 "altivec_register_operand" "v"))))]
3784 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3785 "vsplt<VSX_SPLAT_SUFFIX> %0,%1,<VSX_SPLAT_COUNT>"
3786 [(set_attr "type" "vecperm")])
3788 ;; V2DF/V2DI splat for use by vec_splat builtin
3789 (define_insn "vsx_xxspltd_<mode>"
3790 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3791 (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
3792 (match_operand:QI 2 "u5bit_cint_operand" "i")]
3793 UNSPEC_VSX_XXSPLTD))]
3794 "VECTOR_MEM_VSX_P (<MODE>mode)"
3796 if ((VECTOR_ELT_ORDER_BIG && INTVAL (operands[2]) == 0)
3797 || (!VECTOR_ELT_ORDER_BIG && INTVAL (operands[2]) == 1))
3798 return "xxpermdi %x0,%x1,%x1,0";
3800 return "xxpermdi %x0,%x1,%x1,3";
3802 [(set_attr "type" "vecperm")])
3804 ;; V4SF/V4SI interleave
3805 (define_insn "vsx_xxmrghw_<mode>"
3806 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
3808 (vec_concat:<VS_double>
3809 (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
3810 (match_operand:VSX_W 2 "vsx_register_operand" "wf,<VSa>"))
3811 (parallel [(const_int 0) (const_int 4)
3812 (const_int 1) (const_int 5)])))]
3813 "VECTOR_MEM_VSX_P (<MODE>mode)"
3815 if (BYTES_BIG_ENDIAN)
3816 return "xxmrghw %x0,%x1,%x2";
3818 return "xxmrglw %x0,%x2,%x1";
3820 [(set_attr "type" "vecperm")])
3822 (define_insn "vsx_xxmrglw_<mode>"
3823 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
3825 (vec_concat:<VS_double>
3826 (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
3827 (match_operand:VSX_W 2 "vsx_register_operand" "wf,?<VSa>"))
3828 (parallel [(const_int 2) (const_int 6)
3829 (const_int 3) (const_int 7)])))]
3830 "VECTOR_MEM_VSX_P (<MODE>mode)"
3832 if (BYTES_BIG_ENDIAN)
3833 return "xxmrglw %x0,%x1,%x2";
3835 return "xxmrghw %x0,%x2,%x1";
3837 [(set_attr "type" "vecperm")])
3839 ;; Shift left double by word immediate
3840 (define_insn "vsx_xxsldwi_<mode>"
3841 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSa>")
3842 (unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "<VSa>")
3843 (match_operand:VSX_L 2 "vsx_register_operand" "<VSa>")
3844 (match_operand:QI 3 "u5bit_cint_operand" "i")]
3846 "VECTOR_MEM_VSX_P (<MODE>mode)"
3847 "xxsldwi %x0,%x1,%x2,%3"
3848 [(set_attr "type" "vecperm")])
3851 ;; Vector reduction insns and splitters
3853 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v2df"
3854 [(set (match_operand:V2DF 0 "vfloat_operand" "=&wd,&?wa,wd,?wa")
3858 (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
3859 (parallel [(const_int 1)]))
3862 (parallel [(const_int 0)])))
3864 (clobber (match_scratch:V2DF 2 "=0,0,&wd,&wa"))]
3865 "VECTOR_UNIT_VSX_P (V2DFmode)"
3871 rtx tmp = (GET_CODE (operands[2]) == SCRATCH)
3872 ? gen_reg_rtx (V2DFmode)
3874 emit_insn (gen_vsx_xxsldwi_v2df (tmp, operands[1], operands[1], const2_rtx));
3875 emit_insn (gen_<VEC_reduc_rtx>v2df3 (operands[0], tmp, operands[1]));
3878 [(set_attr "length" "8")
3879 (set_attr "type" "veccomplex")])
3881 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v4sf"
3882 [(set (match_operand:V4SF 0 "vfloat_operand" "=wf,?wa")
3884 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
3885 (match_operand:V4SF 1 "vfloat_operand" "wf,wa")))
3886 (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
3887 (clobber (match_scratch:V4SF 3 "=&wf,&wa"))]
3888 "VECTOR_UNIT_VSX_P (V4SFmode)"
3894 rtx op0 = operands[0];
3895 rtx op1 = operands[1];
3896 rtx tmp2, tmp3, tmp4;
3898 if (can_create_pseudo_p ())
3900 tmp2 = gen_reg_rtx (V4SFmode);
3901 tmp3 = gen_reg_rtx (V4SFmode);
3902 tmp4 = gen_reg_rtx (V4SFmode);
3911 emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
3912 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
3913 emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
3914 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (op0, tmp4, tmp3));
3917 [(set_attr "length" "16")
3918 (set_attr "type" "veccomplex")])
3920 ;; Combiner patterns with the vector reduction patterns that knows we can get
3921 ;; to the top element of the V2DF array without doing an extract.
3923 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar"
3924 [(set (match_operand:DF 0 "vfloat_operand" "=&ws,&?ws,ws,?ws")
3929 (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
3930 (parallel [(const_int 1)]))
3933 (parallel [(const_int 0)])))
3935 (parallel [(const_int 1)])))
3936 (clobber (match_scratch:DF 2 "=0,0,&wd,&wa"))]
3937 "VECTOR_UNIT_VSX_P (V2DFmode)"
3943 rtx hi = gen_highpart (DFmode, operands[1]);
3944 rtx lo = (GET_CODE (operands[2]) == SCRATCH)
3945 ? gen_reg_rtx (DFmode)
3948 emit_insn (gen_vsx_extract_v2df (lo, operands[1], const1_rtx));
3949 emit_insn (gen_<VEC_reduc_rtx>df3 (operands[0], hi, lo));
3952 [(set_attr "length" "8")
3953 (set_attr "type" "veccomplex")])
3955 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf_scalar"
3956 [(set (match_operand:SF 0 "vfloat_operand" "=f,?f")
3959 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
3960 (match_operand:V4SF 1 "vfloat_operand" "wf,wa"))
3961 (parallel [(const_int 3)])))
3962 (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
3963 (clobber (match_scratch:V4SF 3 "=&wf,&wa"))
3964 (clobber (match_scratch:V4SF 4 "=0,0"))]
3965 "VECTOR_UNIT_VSX_P (V4SFmode)"
3971 rtx op0 = operands[0];
3972 rtx op1 = operands[1];
3973 rtx tmp2, tmp3, tmp4, tmp5;
3975 if (can_create_pseudo_p ())
3977 tmp2 = gen_reg_rtx (V4SFmode);
3978 tmp3 = gen_reg_rtx (V4SFmode);
3979 tmp4 = gen_reg_rtx (V4SFmode);
3980 tmp5 = gen_reg_rtx (V4SFmode);
3990 emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
3991 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
3992 emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
3993 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp5, tmp4, tmp3));
3994 emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp5));
3997 [(set_attr "length" "20")
3998 (set_attr "type" "veccomplex")])
4001 ;; Power8 Vector fusion. The fused ops must be physically adjacent.
4003 [(set (match_operand:P 0 "base_reg_operand" "")
4004 (match_operand:P 1 "short_cint_operand" ""))
4005 (set (match_operand:VSX_M 2 "vsx_register_operand" "")
4006 (mem:VSX_M (plus:P (match_dup 0)
4007 (match_operand:P 3 "int_reg_operand" ""))))]
4008 "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
4009 "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M:VSm>x %x2,%0,%3"
4010 [(set_attr "length" "8")
4011 (set_attr "type" "vecload")])
4014 [(set (match_operand:P 0 "base_reg_operand" "")
4015 (match_operand:P 1 "short_cint_operand" ""))
4016 (set (match_operand:VSX_M 2 "vsx_register_operand" "")
4017 (mem:VSX_M (plus:P (match_operand:P 3 "int_reg_operand" "")
4019 "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
4020 "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M:VSm>x %x2,%0,%3"
4021 [(set_attr "length" "8")
4022 (set_attr "type" "vecload")])
4025 ;; ISA 3.0 vector extend sign support
4027 (define_insn "vsx_sign_extend_qi_<mode>"
4028 [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
4030 [(match_operand:V16QI 1 "vsx_register_operand" "v")]
4031 UNSPEC_VSX_SIGN_EXTEND))]
4034 [(set_attr "type" "vecexts")])
4036 (define_insn "vsx_sign_extend_hi_<mode>"
4037 [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
4039 [(match_operand:V8HI 1 "vsx_register_operand" "v")]
4040 UNSPEC_VSX_SIGN_EXTEND))]
4043 [(set_attr "type" "vecexts")])
4045 (define_insn "*vsx_sign_extend_si_v2di"
4046 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
4047 (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
4048 UNSPEC_VSX_SIGN_EXTEND))]
4051 [(set_attr "type" "vecexts")])
4054 ;; ISA 3.0 Binary Floating-Point Support
4056 ;; VSX Scalar Extract Exponent Quad-Precision
4057 (define_insn "xsxexpqp"
4058 [(set (match_operand:DI 0 "altivec_register_operand" "=v")
4059 (unspec:DI [(match_operand:KF 1 "altivec_register_operand" "v")]
4060 UNSPEC_VSX_SXEXPDP))]
4063 [(set_attr "type" "vecmove")])
4065 ;; VSX Scalar Extract Exponent Double-Precision
4066 (define_insn "xsxexpdp"
4067 [(set (match_operand:DI 0 "register_operand" "=r")
4068 (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
4069 UNSPEC_VSX_SXEXPDP))]
4070 "TARGET_P9_VECTOR && TARGET_64BIT"
4072 [(set_attr "type" "integer")])
4074 ;; VSX Scalar Extract Significand Quad-Precision
4075 (define_insn "xsxsigqp"
4076 [(set (match_operand:TI 0 "altivec_register_operand" "=v")
4077 (unspec:TI [(match_operand:KF 1 "altivec_register_operand" "v")]
4081 [(set_attr "type" "vecmove")])
4083 ;; VSX Scalar Extract Significand Double-Precision
4084 (define_insn "xsxsigdp"
4085 [(set (match_operand:DI 0 "register_operand" "=r")
4086 (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
4088 "TARGET_P9_VECTOR && TARGET_64BIT"
4090 [(set_attr "type" "integer")])
4092 ;; VSX Scalar Insert Exponent Quad-Precision Floating Point Argument
4093 (define_insn "xsiexpqpf"
4094 [(set (match_operand:KF 0 "altivec_register_operand" "=v")
4095 (unspec:KF [(match_operand:KF 1 "altivec_register_operand" "v")
4096 (match_operand:DI 2 "altivec_register_operand" "v")]
4097 UNSPEC_VSX_SIEXPQP))]
4100 [(set_attr "type" "vecmove")])
4102 ;; VSX Scalar Insert Exponent Quad-Precision
4103 (define_insn "xsiexpqp"
4104 [(set (match_operand:KF 0 "altivec_register_operand" "=v")
4105 (unspec:KF [(match_operand:TI 1 "altivec_register_operand" "v")
4106 (match_operand:DI 2 "altivec_register_operand" "v")]
4107 UNSPEC_VSX_SIEXPQP))]
4110 [(set_attr "type" "vecmove")])
4112 ;; VSX Scalar Insert Exponent Double-Precision
4113 (define_insn "xsiexpdp"
4114 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
4115 (unspec:DF [(match_operand:DI 1 "register_operand" "r")
4116 (match_operand:DI 2 "register_operand" "r")]
4117 UNSPEC_VSX_SIEXPDP))]
4118 "TARGET_P9_VECTOR && TARGET_64BIT"
4119 "xsiexpdp %x0,%1,%2"
4120 [(set_attr "type" "fpsimple")])
4122 ;; VSX Scalar Insert Exponent Double-Precision Floating Point Argument
4123 (define_insn "xsiexpdpf"
4124 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
4125 (unspec:DF [(match_operand:DF 1 "register_operand" "r")
4126 (match_operand:DI 2 "register_operand" "r")]
4127 UNSPEC_VSX_SIEXPDP))]
4128 "TARGET_P9_VECTOR && TARGET_64BIT"
4129 "xsiexpdp %x0,%1,%2"
4130 [(set_attr "type" "fpsimple")])
4132 ;; VSX Scalar Compare Exponents Double-Precision
4133 (define_expand "xscmpexpdp_<code>"
4137 [(match_operand:DF 1 "vsx_register_operand" "wa")
4138 (match_operand:DF 2 "vsx_register_operand" "wa")]
4139 UNSPEC_VSX_SCMPEXPDP)
4141 (set (match_operand:SI 0 "register_operand" "=r")
4142 (CMP_TEST:SI (match_dup 3)
4146 operands[3] = gen_reg_rtx (CCFPmode);
4149 (define_insn "*xscmpexpdp"
4150 [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
4152 (unspec:DF [(match_operand:DF 1 "vsx_register_operand" "wa")
4153 (match_operand:DF 2 "vsx_register_operand" "wa")]
4154 UNSPEC_VSX_SCMPEXPDP)
4155 (match_operand:SI 3 "zero_constant" "j")))]
4157 "xscmpexpdp %0,%x1,%x2"
4158 [(set_attr "type" "fpcompare")])
4160 ;; VSX Scalar Test Data Class Quad-Precision
4161 ;; (Expansion for scalar_test_data_class (__ieee128, int))
4162 ;; (Has side effect of setting the lt bit if operand 1 is negative,
4163 ;; setting the eq bit if any of the conditions tested by operand 2
4164 ;; are satisfied, and clearing the gt and undordered bits to zero.)
4165 (define_expand "xststdcqp"
4169 [(match_operand:KF 1 "altivec_register_operand" "v")
4170 (match_operand:SI 2 "u7bit_cint_operand" "n")]
4173 (set (match_operand:SI 0 "register_operand" "=r")
4174 (eq:SI (match_dup 3)
4178 operands[3] = gen_reg_rtx (CCFPmode);
4181 ;; VSX Scalar Test Data Class Double- and Single-Precision
4182 ;; (The lt bit is set if operand 1 is negative. The eq bit is set
4183 ;; if any of the conditions tested by operand 2 are satisfied.
4184 ;; The gt and unordered bits are cleared to zero.)
4185 (define_expand "xststdc<Fvsx>"
4189 [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4190 (match_operand:SI 2 "u7bit_cint_operand" "n")]
4193 (set (match_operand:SI 0 "register_operand" "=r")
4194 (eq:SI (match_dup 3)
4198 operands[3] = gen_reg_rtx (CCFPmode);
4199 operands[4] = CONST0_RTX (SImode);
4202 ;; The VSX Scalar Test Negative Quad-Precision
4203 (define_expand "xststdcnegqp"
4207 [(match_operand:KF 1 "altivec_register_operand" "v")
4211 (set (match_operand:SI 0 "register_operand" "=r")
4212 (lt:SI (match_dup 2)
4216 operands[2] = gen_reg_rtx (CCFPmode);
4219 ;; The VSX Scalar Test Negative Double- and Single-Precision
4220 (define_expand "xststdcneg<Fvsx>"
4224 [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4228 (set (match_operand:SI 0 "register_operand" "=r")
4229 (lt:SI (match_dup 2)
4233 operands[2] = gen_reg_rtx (CCFPmode);
4234 operands[3] = CONST0_RTX (SImode);
4237 (define_insn "*xststdcqp"
4238 [(set (match_operand:CCFP 0 "" "=y")
4240 (unspec:KF [(match_operand:KF 1 "altivec_register_operand" "v")
4241 (match_operand:SI 2 "u7bit_cint_operand" "n")]
4245 "xststdcqp %0,%1,%2"
4246 [(set_attr "type" "fpcompare")])
4248 (define_insn "*xststdc<Fvsx>"
4249 [(set (match_operand:CCFP 0 "" "=y")
4251 (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4252 (match_operand:SI 2 "u7bit_cint_operand" "n")]
4254 (match_operand:SI 3 "zero_constant" "j")))]
4256 "xststdc<Fvsx> %0,%x1,%2"
4257 [(set_attr "type" "fpcompare")])
4259 ;; VSX Vector Extract Exponent Double and Single Precision
4260 (define_insn "xvxexp<VSs>"
4261 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4263 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
4266 "xvxexp<VSs> %x0,%x1"
4267 [(set_attr "type" "vecsimple")])
4269 ;; VSX Vector Extract Significand Double and Single Precision
4270 (define_insn "xvxsig<VSs>"
4271 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4273 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
4276 "xvxsig<VSs> %x0,%x1"
4277 [(set_attr "type" "vecsimple")])
4279 ;; VSX Vector Insert Exponent Double and Single Precision
4280 (define_insn "xviexp<VSs>"
4281 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4283 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
4284 (match_operand:VSX_F 2 "vsx_register_operand" "wa")]
4287 "xviexp<VSs> %x0,%x1,%x2"
4288 [(set_attr "type" "vecsimple")])
4290 ;; VSX Vector Test Data Class Double and Single Precision
4291 ;; The corresponding elements of the result vector are all ones
4292 ;; if any of the conditions tested by operand 3 are satisfied.
4293 (define_insn "xvtstdc<VSs>"
4294 [(set (match_operand:<VSI> 0 "vsx_register_operand" "=wa")
4296 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
4297 (match_operand:SI 2 "u7bit_cint_operand" "n")]
4298 UNSPEC_VSX_VTSTDC))]
4300 "xvtstdc<VSs> %x0,%x1,%2"
4301 [(set_attr "type" "vecsimple")])
4303 ;; ISA 3.0 String Operations Support
4305 ;; Compare vectors producing a vector result and a predicate, setting CR6
4306 ;; to indicate a combined status. This pattern matches v16qi, v8hi, and
4307 ;; v4si modes. It does not match v2df, v4sf, or v2di modes. There's no
4308 ;; need to match v4sf, v2df, or v2di modes because those are expanded
4309 ;; to use Power8 instructions.
4310 (define_insn "*vsx_ne_<mode>_p"
4311 [(set (reg:CC CR6_REGNO)
4313 [(ne:CC (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
4314 (match_operand:VSX_EXTRACT_I 2 "gpc_reg_operand" "v"))]
4316 (set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=v")
4317 (ne:VSX_EXTRACT_I (match_dup 1)
4320 "vcmpne<VSX_EXTRACT_WIDTH>. %0,%1,%2"
4321 [(set_attr "type" "vecsimple")])
4323 (define_insn "*vector_nez_<mode>_p"
4324 [(set (reg:CC CR6_REGNO)
4325 (unspec:CC [(unspec:VI
4326 [(match_operand:VI 1 "gpc_reg_operand" "v")
4327 (match_operand:VI 2 "gpc_reg_operand" "v")]
4330 (set (match_operand:VI 0 "gpc_reg_operand" "=v")
4331 (unspec:VI [(match_dup 1)
4335 "vcmpnez<VSX_EXTRACT_WIDTH>. %0,%1,%2"
4336 [(set_attr "type" "vecsimple")])
4338 ;; Load VSX Vector with Length
4339 (define_expand "lxvl"
4341 (match_operand:DI 2 "register_operand"))
4342 (set (match_operand:V16QI 0 "vsx_register_operand")
4344 [(match_operand:DI 1 "gpc_reg_operand")
4347 "TARGET_P9_VECTOR && TARGET_64BIT"
4349 operands[3] = gen_reg_rtx (DImode);
4352 (define_insn "*lxvl"
4353 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
4355 [(match_operand:DI 1 "gpc_reg_operand" "b")
4356 (match_operand:DI 2 "register_operand" "+r")]
4358 "TARGET_P9_VECTOR && TARGET_64BIT"
4359 "sldi %2,%2, 56\; lxvl %x0,%1,%2"
4360 [(set_attr "length" "8")
4361 (set_attr "type" "vecload")])
4363 (define_insn "lxvll"
4364 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
4365 (unspec:V16QI [(match_operand:DI 1 "gpc_reg_operand" "b")
4366 (match_operand:DI 2 "register_operand" "r")]
4370 [(set_attr "type" "vecload")])
4372 ;; Expand for builtin xl_len_r
4373 (define_expand "xl_len_r"
4374 [(match_operand:V16QI 0 "vsx_register_operand")
4375 (match_operand:DI 1 "register_operand")
4376 (match_operand:DI 2 "register_operand")]
4379 rtx shift_mask = gen_reg_rtx (V16QImode);
4380 rtx rtx_vtmp = gen_reg_rtx (V16QImode);
4381 rtx tmp = gen_reg_rtx (DImode);
4383 emit_insn (gen_altivec_lvsl_reg (shift_mask, operands[2]));
4384 emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
4385 emit_insn (gen_lxvll (rtx_vtmp, operands[1], tmp));
4386 emit_insn (gen_altivec_vperm_v8hiv16qi (operands[0], rtx_vtmp, rtx_vtmp,
4391 (define_insn "stxvll"
4392 [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
4393 (unspec:V16QI [(match_operand:V16QI 0 "vsx_register_operand" "wa")
4394 (match_operand:DI 2 "register_operand" "r")]
4398 [(set_attr "type" "vecstore")])
4400 ;; Store VSX Vector with Length
4401 (define_expand "stxvl"
4403 (match_operand:DI 2 "register_operand"))
4404 (set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand"))
4406 [(match_operand:V16QI 0 "vsx_register_operand")
4409 "TARGET_P9_VECTOR && TARGET_64BIT"
4411 operands[3] = gen_reg_rtx (DImode);
4414 (define_insn "*stxvl"
4415 [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
4417 [(match_operand:V16QI 0 "vsx_register_operand" "wa")
4418 (match_operand:DI 2 "register_operand" "+r")]
4420 "TARGET_P9_VECTOR && TARGET_64BIT"
4421 "sldi %2,%2,56\;stxvl %x0,%1,%2"
4422 [(set_attr "length" "8")
4423 (set_attr "type" "vecstore")])
4425 ;; Expand for builtin xst_len_r
4426 (define_expand "xst_len_r"
4427 [(match_operand:V16QI 0 "vsx_register_operand" "=wa")
4428 (match_operand:DI 1 "register_operand" "b")
4429 (match_operand:DI 2 "register_operand" "r")]
4432 rtx shift_mask = gen_reg_rtx (V16QImode);
4433 rtx rtx_vtmp = gen_reg_rtx (V16QImode);
4434 rtx tmp = gen_reg_rtx (DImode);
4436 emit_insn (gen_altivec_lvsr_reg (shift_mask, operands[2]));
4437 emit_insn (gen_altivec_vperm_v8hiv16qi (rtx_vtmp, operands[0], operands[0],
4439 emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
4440 emit_insn (gen_stxvll (rtx_vtmp, operands[1], tmp));
4444 ;; Vector Compare Not Equal Byte
4445 (define_insn "vcmpneb"
4446 [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
4447 (unspec:V16QI [(match_operand:V16QI 1 "altivec_register_operand" "v")
4448 (match_operand:V16QI 2 "altivec_register_operand" "v")]
4452 [(set_attr "type" "vecsimple")])
4454 ;; Vector Compare Not Equal or Zero Byte
4455 (define_insn "vcmpnezb"
4456 [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
4458 [(match_operand:V16QI 1 "altivec_register_operand" "v")
4459 (match_operand:V16QI 2 "altivec_register_operand" "v")]
4463 [(set_attr "type" "vecsimple")])
4465 ;; Vector Compare Not Equal Half Word
4466 (define_insn "vcmpneh"
4467 [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
4468 (unspec:V8HI [(match_operand:V8HI 1 "altivec_register_operand" "v")
4469 (match_operand:V8HI 2 "altivec_register_operand" "v")]
4473 [(set_attr "type" "vecsimple")])
4475 ;; Vector Compare Not Equal or Zero Half Word
4476 (define_insn "vcmpnezh"
4477 [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
4478 (unspec:V8HI [(match_operand:V8HI 1 "altivec_register_operand" "v")
4479 (match_operand:V8HI 2 "altivec_register_operand" "v")]
4483 [(set_attr "type" "vecsimple")])
4485 ;; Vector Compare Not Equal Word
4486 (define_insn "vcmpnew"
4487 [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
4489 [(match_operand:V4SI 1 "altivec_register_operand" "v")
4490 (match_operand:V4SI 2 "altivec_register_operand" "v")]
4494 [(set_attr "type" "vecsimple")])
4496 ;; Vector Compare Not Equal or Zero Word
4497 (define_insn "vcmpnezw"
4498 [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
4499 (unspec:V4SI [(match_operand:V4SI 1 "altivec_register_operand" "v")
4500 (match_operand:V4SI 2 "altivec_register_operand" "v")]
4504 [(set_attr "type" "vecsimple")])
4506 ;; Vector Count Leading Zero Least-Significant Bits Byte
4507 (define_insn "vclzlsbb"
4508 [(set (match_operand:SI 0 "register_operand" "=r")
4510 [(match_operand:V16QI 1 "altivec_register_operand" "v")]
4514 [(set_attr "type" "vecsimple")])
4516 ;; Vector Count Trailing Zero Least-Significant Bits Byte
4517 (define_insn "vctzlsbb"
4518 [(set (match_operand:SI 0 "register_operand" "=r")
4520 [(match_operand:V16QI 1 "altivec_register_operand" "v")]
4524 [(set_attr "type" "vecsimple")])
4526 ;; Vector Extract Unsigned Byte Left-Indexed
4527 (define_insn "vextublx"
4528 [(set (match_operand:SI 0 "register_operand" "=r")
4530 [(match_operand:SI 1 "register_operand" "r")
4531 (match_operand:V16QI 2 "altivec_register_operand" "v")]
4535 [(set_attr "type" "vecsimple")])
4537 ;; Vector Extract Unsigned Byte Right-Indexed
4538 (define_insn "vextubrx"
4539 [(set (match_operand:SI 0 "register_operand" "=r")
4541 [(match_operand:SI 1 "register_operand" "r")
4542 (match_operand:V16QI 2 "altivec_register_operand" "v")]
4546 [(set_attr "type" "vecsimple")])
4548 ;; Vector Extract Unsigned Half Word Left-Indexed
4549 (define_insn "vextuhlx"
4550 [(set (match_operand:SI 0 "register_operand" "=r")
4552 [(match_operand:SI 1 "register_operand" "r")
4553 (match_operand:V8HI 2 "altivec_register_operand" "v")]
4557 [(set_attr "type" "vecsimple")])
4559 ;; Vector Extract Unsigned Half Word Right-Indexed
4560 (define_insn "vextuhrx"
4561 [(set (match_operand:SI 0 "register_operand" "=r")
4563 [(match_operand:SI 1 "register_operand" "r")
4564 (match_operand:V8HI 2 "altivec_register_operand" "v")]
4568 [(set_attr "type" "vecsimple")])
4570 ;; Vector Extract Unsigned Word Left-Indexed
4571 (define_insn "vextuwlx"
4572 [(set (match_operand:SI 0 "register_operand" "=r")
4574 [(match_operand:SI 1 "register_operand" "r")
4575 (match_operand:V4SI 2 "altivec_register_operand" "v")]
4579 [(set_attr "type" "vecsimple")])
4581 ;; Vector Extract Unsigned Word Right-Indexed
4582 (define_insn "vextuwrx"
4583 [(set (match_operand:SI 0 "register_operand" "=r")
4585 [(match_operand:SI 1 "register_operand" "r")
4586 (match_operand:V4SI 2 "altivec_register_operand" "v")]
4590 [(set_attr "type" "vecsimple")])
4592 ;; Vector insert/extract word at arbitrary byte values. Note, the little
4593 ;; endian version needs to adjust the byte number, and the V4SI element in
4595 (define_expand "vextract4b"
4596 [(set (match_operand:DI 0 "gpc_reg_operand")
4597 (unspec:DI [(match_operand:V16QI 1 "vsx_register_operand")
4598 (match_operand:QI 2 "const_0_to_12_operand")]
4599 UNSPEC_XXEXTRACTUW))]
4602 if (!VECTOR_ELT_ORDER_BIG)
4603 operands[2] = GEN_INT (12 - INTVAL (operands[2]));
4606 (define_insn_and_split "*vextract4b_internal"
4607 [(set (match_operand:DI 0 "gpc_reg_operand" "=wj,r")
4608 (unspec:DI [(match_operand:V16QI 1 "vsx_register_operand" "wa,v")
4609 (match_operand:QI 2 "const_0_to_12_operand" "n,n")]
4610 UNSPEC_XXEXTRACTUW))]
4613 xxextractuw %x0,%x1,%2
4615 "&& reload_completed && int_reg_operand (operands[0], DImode)"
4618 rtx op0 = operands[0];
4619 rtx op1 = operands[1];
4620 rtx op2 = operands[2];
4621 rtx op0_si = gen_rtx_REG (SImode, REGNO (op0));
4622 rtx op1_v4si = gen_rtx_REG (V4SImode, REGNO (op1));
4624 emit_move_insn (op0, op2);
4625 if (VECTOR_ELT_ORDER_BIG)
4626 emit_insn (gen_vextuwlx (op0_si, op0_si, op1_v4si));
4628 emit_insn (gen_vextuwrx (op0_si, op0_si, op1_v4si));
4631 [(set_attr "type" "vecperm")])
4633 (define_expand "vinsert4b"
4634 [(set (match_operand:V16QI 0 "vsx_register_operand")
4635 (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand")
4636 (match_operand:V16QI 2 "vsx_register_operand")
4637 (match_operand:QI 3 "const_0_to_12_operand")]
4641 if (!VECTOR_ELT_ORDER_BIG)
4643 rtx op1 = operands[1];
4644 rtx v4si_tmp = gen_reg_rtx (V4SImode);
4645 emit_insn (gen_vsx_xxpermdi_v4si_be (v4si_tmp, op1, op1, const1_rtx));
4646 operands[1] = v4si_tmp;
4647 operands[3] = GEN_INT (12 - INTVAL (operands[3]));
4651 (define_insn "*vinsert4b_internal"
4652 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
4653 (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand" "wa")
4654 (match_operand:V16QI 2 "vsx_register_operand" "0")
4655 (match_operand:QI 3 "const_0_to_12_operand" "n")]
4658 "xxinsertw %x0,%x1,%3"
4659 [(set_attr "type" "vecperm")])
4661 (define_expand "vinsert4b_di"
4662 [(set (match_operand:V16QI 0 "vsx_register_operand")
4663 (unspec:V16QI [(match_operand:DI 1 "vsx_register_operand")
4664 (match_operand:V16QI 2 "vsx_register_operand")
4665 (match_operand:QI 3 "const_0_to_12_operand")]
4669 if (!VECTOR_ELT_ORDER_BIG)
4670 operands[3] = GEN_INT (12 - INTVAL (operands[3]));
4673 (define_insn "*vinsert4b_di_internal"
4674 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
4675 (unspec:V16QI [(match_operand:DI 1 "vsx_register_operand" "wj")
4676 (match_operand:V16QI 2 "vsx_register_operand" "0")
4677 (match_operand:QI 3 "const_0_to_12_operand" "n")]
4680 "xxinsertw %x0,%x1,%3"
4681 [(set_attr "type" "vecperm")])
4683 ;; Generate vector extract four float 32 values from left four elements
4684 ;; of eight element vector of float 16 values.
4685 (define_expand "vextract_fp_from_shorth"
4686 [(set (match_operand:V4SF 0 "register_operand" "=wa")
4687 (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
4688 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH))]
4691 int vals[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0};
4695 rtx mask = gen_reg_rtx (V16QImode);
4696 rtx tmp = gen_reg_rtx (V16QImode);
4699 for (i = 0; i < 16; i++)
4700 rvals[i] = GEN_INT (vals[i]);
4702 /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
4703 inputs in half words 1,3,5,7 (IBM numbering). Use xxperm to move
4704 src half words 0,1,2,3 for the conversion instruction. */
4705 v = gen_rtvec_v (16, rvals);
4706 emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
4707 emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
4708 operands[1], mask));
4709 emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
4713 ;; Generate vector extract four float 32 values from right four elements
4714 ;; of eight element vector of float 16 values.
4715 (define_expand "vextract_fp_from_shortl"
4716 [(set (match_operand:V4SF 0 "register_operand" "=wa")
4717 (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
4718 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL))]
4721 int vals[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0};
4724 rtx mask = gen_reg_rtx (V16QImode);
4725 rtx tmp = gen_reg_rtx (V16QImode);
4728 for (i = 0; i < 16; i++)
4729 rvals[i] = GEN_INT (vals[i]);
4731 /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
4732 inputs in half words 1,3,5,7 (IBM numbering). Use xxperm to move
4733 src half words 4,5,6,7 for the conversion instruction. */
4734 v = gen_rtvec_v (16, rvals);
4735 emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
4736 emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
4737 operands[1], mask));
4738 emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
4742 ;; Support for ISA 3.0 vector byte reverse
4744 ;; Swap all bytes with in a vector
4745 (define_insn "p9_xxbrq_v1ti"
4746 [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
4747 (bswap:V1TI (match_operand:V1TI 1 "vsx_register_operand" "wa")))]
4750 [(set_attr "type" "vecperm")])
4752 (define_expand "p9_xxbrq_v16qi"
4753 [(use (match_operand:V16QI 0 "vsx_register_operand" "=wa"))
4754 (use (match_operand:V16QI 1 "vsx_register_operand" "=wa"))]
4757 rtx op0 = gen_lowpart (V1TImode, operands[0]);
4758 rtx op1 = gen_lowpart (V1TImode, operands[1]);
4759 emit_insn (gen_p9_xxbrq_v1ti (op0, op1));
4763 ;; Swap all bytes in each 64-bit element
4764 (define_insn "p9_xxbrd_<mode>"
4765 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
4766 (bswap:VSX_D (match_operand:VSX_D 1 "vsx_register_operand" "wa")))]
4769 [(set_attr "type" "vecperm")])
4771 ;; Swap all bytes in each 32-bit element
4772 (define_insn "p9_xxbrw_<mode>"
4773 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4774 (bswap:VSX_W (match_operand:VSX_W 1 "vsx_register_operand" "wa")))]
4777 [(set_attr "type" "vecperm")])
4779 ;; Swap all bytes in each 16-bit element
4780 (define_insn "p9_xxbrh_v8hi"
4781 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
4782 (bswap:V8HI (match_operand:V8HI 1 "vsx_register_operand" "wa")))]
4785 [(set_attr "type" "vecperm")])
4788 ;; Operand numbers for the following peephole2
4790 [(SFBOOL_TMP_GPR 0) ;; GPR temporary
4791 (SFBOOL_TMP_VSX 1) ;; vector temporary
4792 (SFBOOL_MFVSR_D 2) ;; move to gpr dest
4793 (SFBOOL_MFVSR_A 3) ;; move to gpr src
4794 (SFBOOL_BOOL_D 4) ;; and/ior/xor dest
4795 (SFBOOL_BOOL_A1 5) ;; and/ior/xor arg1
4796 (SFBOOL_BOOL_A2 6) ;; and/ior/xor arg1
4797 (SFBOOL_SHL_D 7) ;; shift left dest
4798 (SFBOOL_SHL_A 8) ;; shift left arg
4799 (SFBOOL_MTVSR_D 9) ;; move to vecter dest
4800 (SFBOOL_MFVSR_A_V4SF 10) ;; SFBOOL_MFVSR_A as V4SFmode
4801 (SFBOOL_BOOL_A_DI 11) ;; SFBOOL_BOOL_A1/A2 as DImode
4802 (SFBOOL_TMP_VSX_DI 12) ;; SFBOOL_TMP_VSX as DImode
4803 (SFBOOL_MTVSR_D_V4SF 13)]) ;; SFBOOL_MTVSRD_D as V4SFmode
4805 ;; Attempt to optimize some common GLIBC operations using logical operations to
4806 ;; pick apart SFmode operations. For example, there is code from e_powf.c
4807 ;; after macro expansion that looks like:
4812 ;; } ieee_float_shape_type;
4818 ;; ieee_float_shape_type gf_u;
4819 ;; gf_u.value = (t1);
4820 ;; (is) = gf_u.word;
4824 ;; ieee_float_shape_type sf_u;
4825 ;; sf_u.word = (is & 0xfffff000);
4826 ;; (t1) = sf_u.value;
4830 ;; This would result in two direct move operations (convert to memory format,
4831 ;; direct move to GPR, do the AND operation, direct move to VSX, convert to
4832 ;; scalar format). With this peephole, we eliminate the direct move to the
4833 ;; GPR, and instead move the integer mask value to the vector register after a
4834 ;; shift and do the VSX logical operation.
4836 ;; The insns for dealing with SFmode in GPR registers looks like:
4837 ;; (set (reg:V4SF reg2) (unspec:V4SF [(reg:SF reg1)] UNSPEC_VSX_CVDPSPN))
4839 ;; (set (reg:DI reg3) (unspec:DI [(reg:V4SF reg2)] UNSPEC_P8V_RELOAD_FROM_VSX))
4841 ;; (set (reg:DI reg4) (and:DI (reg:DI reg3) (reg:DI reg3)))
4843 ;; (set (reg:DI reg5) (ashift:DI (reg:DI reg4) (const_int 32)))
4845 ;; (set (reg:SF reg6) (unspec:SF [(reg:DI reg5)] UNSPEC_P8V_MTVSRD))
4847 ;; (set (reg:SF reg6) (unspec:SF [(reg:SF reg6)] UNSPEC_VSX_CVSPDPN))
4850 [(match_scratch:DI SFBOOL_TMP_GPR "r")
4851 (match_scratch:V4SF SFBOOL_TMP_VSX "wa")
4853 ;; MFVSRWZ (aka zero_extend)
4854 (set (match_operand:DI SFBOOL_MFVSR_D "int_reg_operand")
4856 (match_operand:SI SFBOOL_MFVSR_A "vsx_register_operand")))
4858 ;; AND/IOR/XOR operation on int
4859 (set (match_operand:SI SFBOOL_BOOL_D "int_reg_operand")
4860 (and_ior_xor:SI (match_operand:SI SFBOOL_BOOL_A1 "int_reg_operand")
4861 (match_operand:SI SFBOOL_BOOL_A2 "reg_or_cint_operand")))
4864 (set (match_operand:DI SFBOOL_SHL_D "int_reg_operand")
4865 (ashift:DI (match_operand:DI SFBOOL_SHL_A "int_reg_operand")
4869 (set (match_operand:SF SFBOOL_MTVSR_D "vsx_register_operand")
4870 (unspec:SF [(match_dup SFBOOL_SHL_D)] UNSPEC_P8V_MTVSRD))]
4872 "TARGET_POWERPC64 && TARGET_DIRECT_MOVE
4873 /* The REG_P (xxx) tests prevents SUBREG's, which allows us to use REGNO
4874 to compare registers, when the mode is different. */
4875 && REG_P (operands[SFBOOL_MFVSR_D]) && REG_P (operands[SFBOOL_BOOL_D])
4876 && REG_P (operands[SFBOOL_BOOL_A1]) && REG_P (operands[SFBOOL_SHL_D])
4877 && REG_P (operands[SFBOOL_SHL_A]) && REG_P (operands[SFBOOL_MTVSR_D])
4878 && (REG_P (operands[SFBOOL_BOOL_A2])
4879 || CONST_INT_P (operands[SFBOOL_BOOL_A2]))
4880 && (REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_MFVSR_D])
4881 || peep2_reg_dead_p (2, operands[SFBOOL_MFVSR_D]))
4882 && (REGNO (operands[SFBOOL_MFVSR_D]) == REGNO (operands[SFBOOL_BOOL_A1])
4883 || (REG_P (operands[SFBOOL_BOOL_A2])
4884 && REGNO (operands[SFBOOL_MFVSR_D])
4885 == REGNO (operands[SFBOOL_BOOL_A2])))
4886 && REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_SHL_A])
4887 && (REGNO (operands[SFBOOL_SHL_D]) == REGNO (operands[SFBOOL_BOOL_D])
4888 || peep2_reg_dead_p (3, operands[SFBOOL_BOOL_D]))
4889 && peep2_reg_dead_p (4, operands[SFBOOL_SHL_D])"
4890 [(set (match_dup SFBOOL_TMP_GPR)
4891 (ashift:DI (match_dup SFBOOL_BOOL_A_DI)
4894 (set (match_dup SFBOOL_TMP_VSX_DI)
4895 (match_dup SFBOOL_TMP_GPR))
4897 (set (match_dup SFBOOL_MTVSR_D_V4SF)
4898 (and_ior_xor:V4SF (match_dup SFBOOL_MFVSR_A_V4SF)
4899 (match_dup SFBOOL_TMP_VSX)))]
4901 rtx bool_a1 = operands[SFBOOL_BOOL_A1];
4902 rtx bool_a2 = operands[SFBOOL_BOOL_A2];
4903 int regno_mfvsr_d = REGNO (operands[SFBOOL_MFVSR_D]);
4904 int regno_mfvsr_a = REGNO (operands[SFBOOL_MFVSR_A]);
4905 int regno_tmp_vsx = REGNO (operands[SFBOOL_TMP_VSX]);
4906 int regno_mtvsr_d = REGNO (operands[SFBOOL_MTVSR_D]);
4908 if (CONST_INT_P (bool_a2))
4910 rtx tmp_gpr = operands[SFBOOL_TMP_GPR];
4911 emit_move_insn (tmp_gpr, bool_a2);
4912 operands[SFBOOL_BOOL_A_DI] = tmp_gpr;
4916 int regno_bool_a1 = REGNO (bool_a1);
4917 int regno_bool_a2 = REGNO (bool_a2);
4918 int regno_bool_a = (regno_mfvsr_d == regno_bool_a1
4919 ? regno_bool_a2 : regno_bool_a1);
4920 operands[SFBOOL_BOOL_A_DI] = gen_rtx_REG (DImode, regno_bool_a);
4923 operands[SFBOOL_MFVSR_A_V4SF] = gen_rtx_REG (V4SFmode, regno_mfvsr_a);
4924 operands[SFBOOL_TMP_VSX_DI] = gen_rtx_REG (DImode, regno_tmp_vsx);
4925 operands[SFBOOL_MTVSR_D_V4SF] = gen_rtx_REG (V4SFmode, regno_mtvsr_d);