2 ;; Copyright (C) 2009-2016 Free Software Foundation, Inc.
3 ;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com>
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published
9 ;; by the Free Software Foundation; either version 3, or (at your
10 ;; option) any later version.
12 ;; GCC is distributed in the hope that it will be useful, but WITHOUT
13 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 ;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 ;; License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 ;; Iterator for both scalar and vector floating point types supported by VSX
22 (define_mode_iterator VSX_B [DF V4SF V2DF])
24 ;; Iterator for the 2 64-bit vector types
25 (define_mode_iterator VSX_D [V2DF V2DI])
27 ;; Iterator for the 2 64-bit vector types + 128-bit types that are loaded with
28 ;; lxvd2x to properly handle swapping words on little endian
29 (define_mode_iterator VSX_LE [V2DF V2DI V1TI])
31 ;; Mode iterator to handle swapping words on little endian for the 128-bit
32 ;; types that goes in a single vector register.
33 (define_mode_iterator VSX_LE_128 [(KF "FLOAT128_VECTOR_P (KFmode)")
34 (TF "FLOAT128_VECTOR_P (TFmode)")
35 (TI "TARGET_VSX_TIMODE")])
37 ;; Iterator for the 2 32-bit vector types
38 (define_mode_iterator VSX_W [V4SF V4SI])
40 ;; Iterator for the DF types
41 (define_mode_iterator VSX_DF [V2DF DF])
43 ;; Iterator for vector floating point types supported by VSX
44 (define_mode_iterator VSX_F [V4SF V2DF])
46 ;; Iterator for logical types supported by VSX
47 (define_mode_iterator VSX_L [V16QI
55 (KF "FLOAT128_VECTOR_P (KFmode)")
56 (TF "FLOAT128_VECTOR_P (TFmode)")])
58 ;; Iterator for memory moves.
59 (define_mode_iterator VSX_M [V16QI
66 (KF "FLOAT128_VECTOR_P (KFmode)")
67 (TF "FLOAT128_VECTOR_P (TFmode)")
68 (TI "TARGET_VSX_TIMODE")])
70 ;; Map into the appropriate load/store name based on the type
71 (define_mode_attr VSm [(V16QI "vw4")
83 ;; Map into the appropriate suffix based on the type
84 (define_mode_attr VSs [(V16QI "sp")
97 ;; Map the register class used
98 (define_mode_attr VSr [(V16QI "v")
112 ;; Map the register class used for float<->int conversions (floating point side)
113 ;; VSr2 is the preferred register class, VSr3 is any register class that will
115 (define_mode_attr VSr2 [(V2DF "wd")
121 (define_mode_attr VSr3 [(V2DF "wa")
127 ;; Map the register class for sp<->dp float conversions, destination
128 (define_mode_attr VSr4 [(SF "ws")
133 ;; Map the register class for sp<->dp float conversions, source
134 (define_mode_attr VSr5 [(SF "ws")
139 ;; The VSX register class that a type can occupy, even if it is not the
140 ;; preferred register class (VSr is the preferred register class that will get
142 (define_mode_attr VSa [(V16QI "wa")
156 ;; Same size integer type for floating point data
157 (define_mode_attr VSi [(V4SF "v4si")
161 (define_mode_attr VSI [(V4SF "V4SI")
165 ;; Word size for same size conversion
166 (define_mode_attr VSc [(V4SF "w")
170 ;; Map into either s or v, depending on whether this is a scalar or vector
172 (define_mode_attr VSv [(V16QI "v")
182 ;; Appropriate type for add ops (and other simple FP ops)
183 (define_mode_attr VStype_simple [(V2DF "vecdouble")
187 (define_mode_attr VSfptype_simple [(V2DF "fp_addsub_d")
191 ;; Appropriate type for multiply ops
192 (define_mode_attr VStype_mul [(V2DF "vecdouble")
196 (define_mode_attr VSfptype_mul [(V2DF "fp_mul_d")
200 ;; Appropriate type for divide ops.
201 (define_mode_attr VStype_div [(V2DF "vecdiv")
205 (define_mode_attr VSfptype_div [(V2DF "fp_div_d")
209 ;; Appropriate type for sqrt ops. For now, just lump the vector sqrt with
211 (define_mode_attr VStype_sqrt [(V2DF "dsqrt")
215 (define_mode_attr VSfptype_sqrt [(V2DF "fp_sqrt_d")
219 ;; Iterator and modes for sp<->dp conversions
220 ;; Because scalar SF values are represented internally as double, use the
221 ;; V4SF type to represent this than SF.
222 (define_mode_iterator VSX_SPDP [DF V4SF V2DF])
224 (define_mode_attr VS_spdp_res [(DF "V4SF")
228 (define_mode_attr VS_spdp_insn [(DF "xscvdpsp")
232 (define_mode_attr VS_spdp_type [(DF "fp")
236 ;; Map the scalar mode for a vector type
237 (define_mode_attr VS_scalar [(V1TI "TI")
245 ;; Map to a double-sized vector mode
246 (define_mode_attr VS_double [(V4SI "V8SI")
252 ;; Map register class for 64-bit element in 128-bit vector for direct moves
254 (define_mode_attr VS_64dm [(V2DF "wk")
257 ;; Map register class for 64-bit element in 128-bit vector for normal register
259 (define_mode_attr VS_64reg [(V2DF "ws")
262 ;; Iterators for loading constants with xxspltib
263 (define_mode_iterator VSINT_84 [V4SI V2DI DI])
264 (define_mode_iterator VSINT_842 [V8HI V4SI V2DI])
266 ;; Constants for creating unspecs
267 (define_c_enum "unspec"
293 UNSPEC_VSX_XVCVDPSXDS
294 UNSPEC_VSX_XVCVDPUXDS
295 UNSPEC_VSX_SIGN_EXTEND
301 ;; The patterns for LE permuted loads and stores come before the general
302 ;; VSX moves so they match first.
303 (define_insn_and_split "*vsx_le_perm_load_<mode>"
304 [(set (match_operand:VSX_LE 0 "vsx_register_operand" "=<VSa>")
305 (match_operand:VSX_LE 1 "memory_operand" "Z"))]
306 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
308 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
312 (parallel [(const_int 1) (const_int 0)])))
316 (parallel [(const_int 1) (const_int 0)])))]
319 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
323 [(set_attr "type" "vecload")
324 (set_attr "length" "8")])
326 (define_insn_and_split "*vsx_le_perm_load_<mode>"
327 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
328 (match_operand:VSX_W 1 "memory_operand" "Z"))]
329 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
331 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
335 (parallel [(const_int 2) (const_int 3)
336 (const_int 0) (const_int 1)])))
340 (parallel [(const_int 2) (const_int 3)
341 (const_int 0) (const_int 1)])))]
344 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
348 [(set_attr "type" "vecload")
349 (set_attr "length" "8")])
351 (define_insn_and_split "*vsx_le_perm_load_v8hi"
352 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
353 (match_operand:V8HI 1 "memory_operand" "Z"))]
354 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
356 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
360 (parallel [(const_int 4) (const_int 5)
361 (const_int 6) (const_int 7)
362 (const_int 0) (const_int 1)
363 (const_int 2) (const_int 3)])))
367 (parallel [(const_int 4) (const_int 5)
368 (const_int 6) (const_int 7)
369 (const_int 0) (const_int 1)
370 (const_int 2) (const_int 3)])))]
373 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
377 [(set_attr "type" "vecload")
378 (set_attr "length" "8")])
380 (define_insn_and_split "*vsx_le_perm_load_v16qi"
381 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
382 (match_operand:V16QI 1 "memory_operand" "Z"))]
383 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
385 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
389 (parallel [(const_int 8) (const_int 9)
390 (const_int 10) (const_int 11)
391 (const_int 12) (const_int 13)
392 (const_int 14) (const_int 15)
393 (const_int 0) (const_int 1)
394 (const_int 2) (const_int 3)
395 (const_int 4) (const_int 5)
396 (const_int 6) (const_int 7)])))
400 (parallel [(const_int 8) (const_int 9)
401 (const_int 10) (const_int 11)
402 (const_int 12) (const_int 13)
403 (const_int 14) (const_int 15)
404 (const_int 0) (const_int 1)
405 (const_int 2) (const_int 3)
406 (const_int 4) (const_int 5)
407 (const_int 6) (const_int 7)])))]
410 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
414 [(set_attr "type" "vecload")
415 (set_attr "length" "8")])
417 (define_insn "*vsx_le_perm_store_<mode>"
418 [(set (match_operand:VSX_LE 0 "memory_operand" "=Z")
419 (match_operand:VSX_LE 1 "vsx_register_operand" "+<VSa>"))]
420 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
422 [(set_attr "type" "vecstore")
423 (set_attr "length" "12")])
426 [(set (match_operand:VSX_LE 0 "memory_operand" "")
427 (match_operand:VSX_LE 1 "vsx_register_operand" ""))]
428 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
432 (parallel [(const_int 1) (const_int 0)])))
436 (parallel [(const_int 1) (const_int 0)])))]
438 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
442 ;; The post-reload split requires that we re-permute the source
443 ;; register in case it is still live.
445 [(set (match_operand:VSX_LE 0 "memory_operand" "")
446 (match_operand:VSX_LE 1 "vsx_register_operand" ""))]
447 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
451 (parallel [(const_int 1) (const_int 0)])))
455 (parallel [(const_int 1) (const_int 0)])))
459 (parallel [(const_int 1) (const_int 0)])))]
462 (define_insn "*vsx_le_perm_store_<mode>"
463 [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
464 (match_operand:VSX_W 1 "vsx_register_operand" "+<VSa>"))]
465 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
467 [(set_attr "type" "vecstore")
468 (set_attr "length" "12")])
471 [(set (match_operand:VSX_W 0 "memory_operand" "")
472 (match_operand:VSX_W 1 "vsx_register_operand" ""))]
473 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
477 (parallel [(const_int 2) (const_int 3)
478 (const_int 0) (const_int 1)])))
482 (parallel [(const_int 2) (const_int 3)
483 (const_int 0) (const_int 1)])))]
485 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
489 ;; The post-reload split requires that we re-permute the source
490 ;; register in case it is still live.
492 [(set (match_operand:VSX_W 0 "memory_operand" "")
493 (match_operand:VSX_W 1 "vsx_register_operand" ""))]
494 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
498 (parallel [(const_int 2) (const_int 3)
499 (const_int 0) (const_int 1)])))
503 (parallel [(const_int 2) (const_int 3)
504 (const_int 0) (const_int 1)])))
508 (parallel [(const_int 2) (const_int 3)
509 (const_int 0) (const_int 1)])))]
512 (define_insn "*vsx_le_perm_store_v8hi"
513 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
514 (match_operand:V8HI 1 "vsx_register_operand" "+wa"))]
515 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
517 [(set_attr "type" "vecstore")
518 (set_attr "length" "12")])
521 [(set (match_operand:V8HI 0 "memory_operand" "")
522 (match_operand:V8HI 1 "vsx_register_operand" ""))]
523 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
527 (parallel [(const_int 4) (const_int 5)
528 (const_int 6) (const_int 7)
529 (const_int 0) (const_int 1)
530 (const_int 2) (const_int 3)])))
534 (parallel [(const_int 4) (const_int 5)
535 (const_int 6) (const_int 7)
536 (const_int 0) (const_int 1)
537 (const_int 2) (const_int 3)])))]
539 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
543 ;; The post-reload split requires that we re-permute the source
544 ;; register in case it is still live.
546 [(set (match_operand:V8HI 0 "memory_operand" "")
547 (match_operand:V8HI 1 "vsx_register_operand" ""))]
548 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
552 (parallel [(const_int 4) (const_int 5)
553 (const_int 6) (const_int 7)
554 (const_int 0) (const_int 1)
555 (const_int 2) (const_int 3)])))
559 (parallel [(const_int 4) (const_int 5)
560 (const_int 6) (const_int 7)
561 (const_int 0) (const_int 1)
562 (const_int 2) (const_int 3)])))
566 (parallel [(const_int 4) (const_int 5)
567 (const_int 6) (const_int 7)
568 (const_int 0) (const_int 1)
569 (const_int 2) (const_int 3)])))]
572 (define_insn "*vsx_le_perm_store_v16qi"
573 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
574 (match_operand:V16QI 1 "vsx_register_operand" "+wa"))]
575 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
577 [(set_attr "type" "vecstore")
578 (set_attr "length" "12")])
581 [(set (match_operand:V16QI 0 "memory_operand" "")
582 (match_operand:V16QI 1 "vsx_register_operand" ""))]
583 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
587 (parallel [(const_int 8) (const_int 9)
588 (const_int 10) (const_int 11)
589 (const_int 12) (const_int 13)
590 (const_int 14) (const_int 15)
591 (const_int 0) (const_int 1)
592 (const_int 2) (const_int 3)
593 (const_int 4) (const_int 5)
594 (const_int 6) (const_int 7)])))
598 (parallel [(const_int 8) (const_int 9)
599 (const_int 10) (const_int 11)
600 (const_int 12) (const_int 13)
601 (const_int 14) (const_int 15)
602 (const_int 0) (const_int 1)
603 (const_int 2) (const_int 3)
604 (const_int 4) (const_int 5)
605 (const_int 6) (const_int 7)])))]
607 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
611 ;; The post-reload split requires that we re-permute the source
612 ;; register in case it is still live.
614 [(set (match_operand:V16QI 0 "memory_operand" "")
615 (match_operand:V16QI 1 "vsx_register_operand" ""))]
616 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
620 (parallel [(const_int 8) (const_int 9)
621 (const_int 10) (const_int 11)
622 (const_int 12) (const_int 13)
623 (const_int 14) (const_int 15)
624 (const_int 0) (const_int 1)
625 (const_int 2) (const_int 3)
626 (const_int 4) (const_int 5)
627 (const_int 6) (const_int 7)])))
631 (parallel [(const_int 8) (const_int 9)
632 (const_int 10) (const_int 11)
633 (const_int 12) (const_int 13)
634 (const_int 14) (const_int 15)
635 (const_int 0) (const_int 1)
636 (const_int 2) (const_int 3)
637 (const_int 4) (const_int 5)
638 (const_int 6) (const_int 7)])))
642 (parallel [(const_int 8) (const_int 9)
643 (const_int 10) (const_int 11)
644 (const_int 12) (const_int 13)
645 (const_int 14) (const_int 15)
646 (const_int 0) (const_int 1)
647 (const_int 2) (const_int 3)
648 (const_int 4) (const_int 5)
649 (const_int 6) (const_int 7)])))]
652 ;; Little endian word swapping for 128-bit types that are either scalars or the
653 ;; special V1TI container class, which it is not appropriate to use vec_select
655 (define_insn "*vsx_le_permute_<mode>"
656 [(set (match_operand:VSX_LE_128 0 "nonimmediate_operand" "=<VSa>,<VSa>,Z")
658 (match_operand:VSX_LE_128 1 "input_operand" "<VSa>,Z,<VSa>")
660 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
662 xxpermdi %x0,%x1,%x1,2
665 [(set_attr "length" "4")
666 (set_attr "type" "vecperm,vecload,vecstore")])
668 (define_insn_and_split "*vsx_le_undo_permute_<mode>"
669 [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=<VSa>,<VSa>")
672 (match_operand:VSX_LE_128 1 "vsx_register_operand" "0,<VSa>")
675 "!BYTES_BIG_ENDIAN && TARGET_VSX"
680 [(set (match_dup 0) (match_dup 1))]
682 if (reload_completed && REGNO (operands[0]) == REGNO (operands[1]))
684 emit_note (NOTE_INSN_DELETED);
688 [(set_attr "length" "0,4")
689 (set_attr "type" "vecsimple")])
691 (define_insn_and_split "*vsx_le_perm_load_<mode>"
692 [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=<VSa>")
693 (match_operand:VSX_LE_128 1 "memory_operand" "Z"))]
694 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
696 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
698 (rotate:VSX_LE_128 (match_dup 1)
701 (rotate:VSX_LE_128 (match_dup 2)
705 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
709 [(set_attr "type" "vecload")
710 (set_attr "length" "8")])
712 (define_insn "*vsx_le_perm_store_<mode>"
713 [(set (match_operand:VSX_LE_128 0 "memory_operand" "=Z")
714 (match_operand:VSX_LE_128 1 "vsx_register_operand" "+<VSa>"))]
715 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
717 [(set_attr "type" "vecstore")
718 (set_attr "length" "12")])
721 [(set (match_operand:VSX_LE_128 0 "memory_operand" "")
722 (match_operand:VSX_LE_128 1 "vsx_register_operand" ""))]
723 "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR"
725 (rotate:VSX_LE_128 (match_dup 1)
728 (rotate:VSX_LE_128 (match_dup 2)
731 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
735 ;; Peephole to catch memory to memory transfers for TImode if TImode landed in
736 ;; VSX registers on a little endian system. The vector types and IEEE 128-bit
737 ;; floating point are handled by the more generic swap elimination pass.
739 [(set (match_operand:TI 0 "vsx_register_operand" "")
740 (rotate:TI (match_operand:TI 1 "vsx_register_operand" "")
742 (set (match_operand:TI 2 "vsx_register_operand" "")
743 (rotate:TI (match_dup 0)
745 "!BYTES_BIG_ENDIAN && TARGET_VSX && TARGET_VSX_TIMODE && !TARGET_P9_VECTOR
746 && (rtx_equal_p (operands[0], operands[2])
747 || peep2_reg_dead_p (2, operands[0]))"
748 [(set (match_dup 2) (match_dup 1))])
750 ;; The post-reload split requires that we re-permute the source
751 ;; register in case it is still live.
753 [(set (match_operand:VSX_LE_128 0 "memory_operand" "")
754 (match_operand:VSX_LE_128 1 "vsx_register_operand" ""))]
755 "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR"
757 (rotate:VSX_LE_128 (match_dup 1)
760 (rotate:VSX_LE_128 (match_dup 1)
763 (rotate:VSX_LE_128 (match_dup 1)
767 ;; Vector constants that can be generated with XXSPLTIB that was added in ISA
768 ;; 3.0. Both (const_vector [..]) and (vec_duplicate ...) forms are recognized.
769 (define_insn "xxspltib_v16qi"
770 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
771 (vec_duplicate:V16QI (match_operand:SI 1 "s8bit_cint_operand" "n")))]
774 operands[2] = GEN_INT (INTVAL (operands[1]) & 0xff);
775 return "xxspltib %x0,%2";
777 [(set_attr "type" "vecperm")])
779 (define_insn "xxspltib_<mode>_nosplit"
780 [(set (match_operand:VSINT_842 0 "vsx_register_operand" "=wa,wa")
781 (match_operand:VSINT_842 1 "xxspltib_constant_nosplit" "jwM,wE"))]
784 rtx op1 = operands[1];
788 if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
792 operands[2] = GEN_INT (value & 0xff);
793 return "xxspltib %x0,%2";
795 [(set_attr "type" "vecperm")])
797 (define_insn_and_split "*xxspltib_<mode>_split"
798 [(set (match_operand:VSINT_842 0 "altivec_register_operand" "=v")
799 (match_operand:VSINT_842 1 "xxspltib_constant_split" "wS"))]
807 rtx op0 = operands[0];
808 rtx op1 = operands[1];
809 rtx tmp = ((can_create_pseudo_p ())
810 ? gen_reg_rtx (V16QImode)
811 : gen_lowpart (V16QImode, op0));
813 if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
817 emit_insn (gen_xxspltib_v16qi (tmp, GEN_INT (value)));
819 if (<MODE>mode == V2DImode)
820 emit_insn (gen_vsx_sign_extend_qi_v2di (op0, tmp));
822 else if (<MODE>mode == V4SImode)
823 emit_insn (gen_vsx_sign_extend_qi_v4si (op0, tmp));
825 else if (<MODE>mode == V8HImode)
826 emit_insn (gen_altivec_vupkhsb (op0, tmp));
833 [(set_attr "type" "vecperm")
834 (set_attr "length" "8")])
837 ;; Prefer using vector registers over GPRs. Prefer using ISA 3.0's XXSPLTISB
838 ;; or Altivec VSPLITW 0/-1 over XXLXOR/XXLORC to set a register to all 0's or
839 ;; all 1's, since the machine does not have to wait for the previous
840 ;; instruction using the register being set (such as a store waiting on a slow
841 ;; instruction). But generate XXLXOR/XXLORC if it will avoid a register move.
843 ;; VSX store VSX load VSX move VSX->GPR GPR->VSX LQ (GPR)
844 ;; STQ (GPR) GPR load GPR store GPR move XXSPLTIB VSPLTISW
845 ;; VSX 0/-1 GPR 0/-1 VMX const GPR const LVX (VMX) STVX (VMX)
846 (define_insn "*vsx_mov<mode>_64bit"
847 [(set (match_operand:VSX_M 0 "nonimmediate_operand"
848 "=ZwO, <VSa>, <VSa>, r, we, ?wQ,
849 ?&r, ??r, ??Y, ??r, wo, v,
850 ?<VSa>, *r, v, ??r, wZ, v")
852 (match_operand:VSX_M 1 "input_operand"
853 "<VSa>, ZwO, <VSa>, we, r, r,
854 wQ, Y, r, r, wE, jwM,
855 ?jwM, jwM, W, W, v, wZ"))]
857 "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
858 && (register_operand (operands[0], <MODE>mode)
859 || register_operand (operands[1], <MODE>mode))"
861 return rs6000_output_move_128bit (operands);
864 "vecstore, vecload, vecsimple, mffgpr, mftgpr, load,
865 store, load, store, *, vecsimple, vecsimple,
866 vecsimple, *, *, *, vecstore, vecload")
871 4, 8, 20, 20, 4, 4")])
873 ;; VSX store VSX load VSX move GPR load GPR store GPR move
874 ;; XXSPLTIB VSPLTISW VSX 0/-1 GPR 0/-1 VMX const GPR const
875 ;; LVX (VMX) STVX (VMX)
876 (define_insn "*vsx_mov<mode>_32bit"
877 [(set (match_operand:VSX_M 0 "nonimmediate_operand"
878 "=ZwO, <VSa>, <VSa>, ??r, ??Y, ??r,
879 wo, v, ?<VSa>, *r, v, ??r,
882 (match_operand:VSX_M 1 "input_operand"
883 "<VSa>, ZwO, <VSa>, Y, r, r,
884 wE, jwM, ?jwM, jwM, W, W,
887 "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
888 && (register_operand (operands[0], <MODE>mode)
889 || register_operand (operands[1], <MODE>mode))"
891 return rs6000_output_move_128bit (operands);
894 "vecstore, vecload, vecsimple, load, store, *,
895 vecsimple, vecsimple, vecsimple, *, *, *,
899 "4, 4, 4, 16, 16, 16,
903 ;; Explicit load/store expanders for the builtin functions
904 (define_expand "vsx_load_<mode>"
905 [(set (match_operand:VSX_M 0 "vsx_register_operand" "")
906 (match_operand:VSX_M 1 "memory_operand" ""))]
907 "VECTOR_MEM_VSX_P (<MODE>mode)"
910 (define_expand "vsx_store_<mode>"
911 [(set (match_operand:VSX_M 0 "memory_operand" "")
912 (match_operand:VSX_M 1 "vsx_register_operand" ""))]
913 "VECTOR_MEM_VSX_P (<MODE>mode)"
916 ;; Explicit load/store expanders for the builtin functions for lxvd2x, etc.,
917 ;; when you really want their element-reversing behavior.
918 (define_insn "vsx_ld_elemrev_v2di"
919 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
921 (match_operand:V2DI 1 "memory_operand" "Z")
922 (parallel [(const_int 1) (const_int 0)])))]
923 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
925 [(set_attr "type" "vecload")])
927 (define_insn "vsx_ld_elemrev_v2df"
928 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
930 (match_operand:V2DF 1 "memory_operand" "Z")
931 (parallel [(const_int 1) (const_int 0)])))]
932 "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
934 [(set_attr "type" "vecload")])
936 (define_insn "vsx_ld_elemrev_v4si"
937 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
939 (match_operand:V4SI 1 "memory_operand" "Z")
940 (parallel [(const_int 3) (const_int 2)
941 (const_int 1) (const_int 0)])))]
942 "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
944 [(set_attr "type" "vecload")])
946 (define_insn "vsx_ld_elemrev_v4sf"
947 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
949 (match_operand:V4SF 1 "memory_operand" "Z")
950 (parallel [(const_int 3) (const_int 2)
951 (const_int 1) (const_int 0)])))]
952 "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
954 [(set_attr "type" "vecload")])
956 (define_insn "vsx_ld_elemrev_v8hi"
957 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
959 (match_operand:V8HI 1 "memory_operand" "Z")
960 (parallel [(const_int 7) (const_int 6)
961 (const_int 5) (const_int 4)
962 (const_int 3) (const_int 2)
963 (const_int 1) (const_int 0)])))]
964 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
966 [(set_attr "type" "vecload")])
968 (define_insn "vsx_ld_elemrev_v16qi"
969 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
971 (match_operand:V16QI 1 "memory_operand" "Z")
972 (parallel [(const_int 15) (const_int 14)
973 (const_int 13) (const_int 12)
974 (const_int 11) (const_int 10)
975 (const_int 9) (const_int 8)
976 (const_int 7) (const_int 6)
977 (const_int 5) (const_int 4)
978 (const_int 3) (const_int 2)
979 (const_int 1) (const_int 0)])))]
980 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
982 [(set_attr "type" "vecload")])
984 (define_insn "vsx_st_elemrev_v2df"
985 [(set (match_operand:V2DF 0 "memory_operand" "=Z")
987 (match_operand:V2DF 1 "vsx_register_operand" "wa")
988 (parallel [(const_int 1) (const_int 0)])))]
989 "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
991 [(set_attr "type" "vecstore")])
993 (define_insn "vsx_st_elemrev_v2di"
994 [(set (match_operand:V2DI 0 "memory_operand" "=Z")
996 (match_operand:V2DI 1 "vsx_register_operand" "wa")
997 (parallel [(const_int 1) (const_int 0)])))]
998 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1000 [(set_attr "type" "vecstore")])
1002 (define_insn "vsx_st_elemrev_v4sf"
1003 [(set (match_operand:V4SF 0 "memory_operand" "=Z")
1005 (match_operand:V4SF 1 "vsx_register_operand" "wa")
1006 (parallel [(const_int 3) (const_int 2)
1007 (const_int 1) (const_int 0)])))]
1008 "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1010 [(set_attr "type" "vecstore")])
1012 (define_insn "vsx_st_elemrev_v4si"
1013 [(set (match_operand:V4SI 0 "memory_operand" "=Z")
1015 (match_operand:V4SI 1 "vsx_register_operand" "wa")
1016 (parallel [(const_int 3) (const_int 2)
1017 (const_int 1) (const_int 0)])))]
1018 "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1020 [(set_attr "type" "vecstore")])
1022 (define_insn "vsx_st_elemrev_v8hi"
1023 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1025 (match_operand:V8HI 1 "vsx_register_operand" "wa")
1026 (parallel [(const_int 7) (const_int 6)
1027 (const_int 5) (const_int 4)
1028 (const_int 3) (const_int 2)
1029 (const_int 1) (const_int 0)])))]
1030 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1032 [(set_attr "type" "vecstore")])
1034 (define_insn "vsx_st_elemrev_v16qi"
1035 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1037 (match_operand:V16QI 1 "vsx_register_operand" "wa")
1038 (parallel [(const_int 15) (const_int 14)
1039 (const_int 13) (const_int 12)
1040 (const_int 11) (const_int 10)
1041 (const_int 9) (const_int 8)
1042 (const_int 7) (const_int 6)
1043 (const_int 5) (const_int 4)
1044 (const_int 3) (const_int 2)
1045 (const_int 1) (const_int 0)])))]
1046 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1048 [(set_attr "type" "vecstore")])
1051 ;; VSX vector floating point arithmetic instructions. The VSX scalar
1052 ;; instructions are now combined with the insn for the traditional floating
1054 (define_insn "*vsx_add<mode>3"
1055 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1056 (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1057 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1058 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1059 "xvadd<VSs> %x0,%x1,%x2"
1060 [(set_attr "type" "<VStype_simple>")
1061 (set_attr "fp_type" "<VSfptype_simple>")])
1063 (define_insn "*vsx_sub<mode>3"
1064 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1065 (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1066 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1067 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1068 "xvsub<VSs> %x0,%x1,%x2"
1069 [(set_attr "type" "<VStype_simple>")
1070 (set_attr "fp_type" "<VSfptype_simple>")])
1072 (define_insn "*vsx_mul<mode>3"
1073 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1074 (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1075 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1076 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1077 "xvmul<VSs> %x0,%x1,%x2"
1078 [(set_attr "type" "<VStype_simple>")
1079 (set_attr "fp_type" "<VSfptype_mul>")])
1081 ; Emulate vector with scalar for vec_mul in V2DImode
1082 (define_insn_and_split "vsx_mul_v2di"
1083 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1084 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1085 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1087 "VECTOR_MEM_VSX_P (V2DImode)"
1089 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed && !reload_in_progress"
1093 rtx op0 = operands[0];
1094 rtx op1 = operands[1];
1095 rtx op2 = operands[2];
1096 rtx op3 = gen_reg_rtx (DImode);
1097 rtx op4 = gen_reg_rtx (DImode);
1098 rtx op5 = gen_reg_rtx (DImode);
1099 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1100 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1101 emit_insn (gen_muldi3 (op5, op3, op4));
1102 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1103 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1104 emit_insn (gen_muldi3 (op3, op3, op4));
1105 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1108 [(set_attr "type" "mul")])
1110 (define_insn "*vsx_div<mode>3"
1111 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1112 (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1113 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1114 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1115 "xvdiv<VSs> %x0,%x1,%x2"
1116 [(set_attr "type" "<VStype_div>")
1117 (set_attr "fp_type" "<VSfptype_div>")])
1119 ; Emulate vector with scalar for vec_div in V2DImode
1120 (define_insn_and_split "vsx_div_v2di"
1121 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1122 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1123 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1125 "VECTOR_MEM_VSX_P (V2DImode)"
1127 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed && !reload_in_progress"
1131 rtx op0 = operands[0];
1132 rtx op1 = operands[1];
1133 rtx op2 = operands[2];
1134 rtx op3 = gen_reg_rtx (DImode);
1135 rtx op4 = gen_reg_rtx (DImode);
1136 rtx op5 = gen_reg_rtx (DImode);
1137 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1138 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1139 emit_insn (gen_divdi3 (op5, op3, op4));
1140 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1141 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1142 emit_insn (gen_divdi3 (op3, op3, op4));
1143 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1146 [(set_attr "type" "div")])
1148 (define_insn_and_split "vsx_udiv_v2di"
1149 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1150 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1151 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1153 "VECTOR_MEM_VSX_P (V2DImode)"
1155 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed && !reload_in_progress"
1159 rtx op0 = operands[0];
1160 rtx op1 = operands[1];
1161 rtx op2 = operands[2];
1162 rtx op3 = gen_reg_rtx (DImode);
1163 rtx op4 = gen_reg_rtx (DImode);
1164 rtx op5 = gen_reg_rtx (DImode);
1165 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1166 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1167 emit_insn (gen_udivdi3 (op5, op3, op4));
1168 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1169 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1170 emit_insn (gen_udivdi3 (op3, op3, op4));
1171 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1174 [(set_attr "type" "div")])
1176 ;; *tdiv* instruction returning the FG flag
1177 (define_expand "vsx_tdiv<mode>3_fg"
1179 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")
1180 (match_operand:VSX_B 2 "vsx_register_operand" "")]
1182 (set (match_operand:SI 0 "gpc_reg_operand" "")
1183 (gt:SI (match_dup 3)
1185 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1187 operands[3] = gen_reg_rtx (CCFPmode);
1190 ;; *tdiv* instruction returning the FE flag
1191 (define_expand "vsx_tdiv<mode>3_fe"
1193 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")
1194 (match_operand:VSX_B 2 "vsx_register_operand" "")]
1196 (set (match_operand:SI 0 "gpc_reg_operand" "")
1197 (eq:SI (match_dup 3)
1199 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1201 operands[3] = gen_reg_rtx (CCFPmode);
1204 (define_insn "*vsx_tdiv<mode>3_internal"
1205 [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
1206 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")
1207 (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,<VSa>")]
1209 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1210 "x<VSv>tdiv<VSs> %0,%x1,%x2"
1211 [(set_attr "type" "<VStype_simple>")
1212 (set_attr "fp_type" "<VSfptype_simple>")])
1214 (define_insn "vsx_fre<mode>2"
1215 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1216 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1218 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1220 [(set_attr "type" "<VStype_simple>")
1221 (set_attr "fp_type" "<VSfptype_simple>")])
1223 (define_insn "*vsx_neg<mode>2"
1224 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1225 (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1226 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1227 "xvneg<VSs> %x0,%x1"
1228 [(set_attr "type" "<VStype_simple>")
1229 (set_attr "fp_type" "<VSfptype_simple>")])
1231 (define_insn "*vsx_abs<mode>2"
1232 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1233 (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1234 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1235 "xvabs<VSs> %x0,%x1"
1236 [(set_attr "type" "<VStype_simple>")
1237 (set_attr "fp_type" "<VSfptype_simple>")])
1239 (define_insn "vsx_nabs<mode>2"
1240 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1243 (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>"))))]
1244 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1245 "xvnabs<VSs> %x0,%x1"
1246 [(set_attr "type" "<VStype_simple>")
1247 (set_attr "fp_type" "<VSfptype_simple>")])
1249 (define_insn "vsx_smax<mode>3"
1250 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1251 (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1252 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1253 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1254 "xvmax<VSs> %x0,%x1,%x2"
1255 [(set_attr "type" "<VStype_simple>")
1256 (set_attr "fp_type" "<VSfptype_simple>")])
1258 (define_insn "*vsx_smin<mode>3"
1259 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1260 (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1261 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1262 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1263 "xvmin<VSs> %x0,%x1,%x2"
1264 [(set_attr "type" "<VStype_simple>")
1265 (set_attr "fp_type" "<VSfptype_simple>")])
1267 (define_insn "*vsx_sqrt<mode>2"
1268 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1269 (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1270 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1271 "xvsqrt<VSs> %x0,%x1"
1272 [(set_attr "type" "<VStype_sqrt>")
1273 (set_attr "fp_type" "<VSfptype_sqrt>")])
1275 (define_insn "*vsx_rsqrte<mode>2"
1276 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1277 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1279 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1280 "xvrsqrte<VSs> %x0,%x1"
1281 [(set_attr "type" "<VStype_simple>")
1282 (set_attr "fp_type" "<VSfptype_simple>")])
1284 ;; *tsqrt* returning the fg flag
1285 (define_expand "vsx_tsqrt<mode>2_fg"
1287 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")]
1289 (set (match_operand:SI 0 "gpc_reg_operand" "")
1290 (gt:SI (match_dup 3)
1292 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1294 operands[3] = gen_reg_rtx (CCFPmode);
1297 ;; *tsqrt* returning the fe flag
1298 (define_expand "vsx_tsqrt<mode>2_fe"
1300 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")]
1302 (set (match_operand:SI 0 "gpc_reg_operand" "")
1303 (eq:SI (match_dup 3)
1305 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1307 operands[3] = gen_reg_rtx (CCFPmode);
1310 (define_insn "*vsx_tsqrt<mode>2_internal"
1311 [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
1312 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
1314 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1315 "x<VSv>tsqrt<VSs> %0,%x1"
1316 [(set_attr "type" "<VStype_simple>")
1317 (set_attr "fp_type" "<VSfptype_simple>")])
1319 ;; Fused vector multiply/add instructions. Support the classical Altivec
1320 ;; versions of fma, which allows the target to be a separate register from the
1321 ;; 3 inputs. Under VSX, the target must be either the addend or the first
1324 (define_insn "*vsx_fmav4sf4"
1325 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
1327 (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v")
1328 (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v")
1329 (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))]
1330 "VECTOR_UNIT_VSX_P (V4SFmode)"
1332 xvmaddasp %x0,%x1,%x2
1333 xvmaddmsp %x0,%x1,%x3
1334 xvmaddasp %x0,%x1,%x2
1335 xvmaddmsp %x0,%x1,%x3
1336 vmaddfp %0,%1,%2,%3"
1337 [(set_attr "type" "vecfloat")])
1339 (define_insn "*vsx_fmav2df4"
1340 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa")
1342 (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa")
1343 (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0")
1344 (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))]
1345 "VECTOR_UNIT_VSX_P (V2DFmode)"
1347 xvmaddadp %x0,%x1,%x2
1348 xvmaddmdp %x0,%x1,%x3
1349 xvmaddadp %x0,%x1,%x2
1350 xvmaddmdp %x0,%x1,%x3"
1351 [(set_attr "type" "vecdouble")])
1353 (define_insn "*vsx_fms<mode>4"
1354 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?<VSa>,?<VSa>")
1356 (match_operand:VSX_F 1 "vsx_register_operand" "%<VSr>,<VSr>,<VSa>,<VSa>")
1357 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,<VSa>,0")
1359 (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,<VSa>"))))]
1360 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1362 xvmsuba<VSs> %x0,%x1,%x2
1363 xvmsubm<VSs> %x0,%x1,%x3
1364 xvmsuba<VSs> %x0,%x1,%x2
1365 xvmsubm<VSs> %x0,%x1,%x3"
1366 [(set_attr "type" "<VStype_mul>")])
1368 (define_insn "*vsx_nfma<mode>4"
1369 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?<VSa>,?<VSa>")
1372 (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSr>,<VSa>,<VSa>")
1373 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,<VSa>,0")
1374 (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,<VSa>"))))]
1375 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1377 xvnmadda<VSs> %x0,%x1,%x2
1378 xvnmaddm<VSs> %x0,%x1,%x3
1379 xvnmadda<VSs> %x0,%x1,%x2
1380 xvnmaddm<VSs> %x0,%x1,%x3"
1381 [(set_attr "type" "<VStype_mul>")
1382 (set_attr "fp_type" "<VSfptype_mul>")])
1384 (define_insn "*vsx_nfmsv4sf4"
1385 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
1388 (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v")
1389 (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v")
1391 (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))))]
1392 "VECTOR_UNIT_VSX_P (V4SFmode)"
1394 xvnmsubasp %x0,%x1,%x2
1395 xvnmsubmsp %x0,%x1,%x3
1396 xvnmsubasp %x0,%x1,%x2
1397 xvnmsubmsp %x0,%x1,%x3
1398 vnmsubfp %0,%1,%2,%3"
1399 [(set_attr "type" "vecfloat")])
1401 (define_insn "*vsx_nfmsv2df4"
1402 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa")
1405 (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa")
1406 (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0")
1408 (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))))]
1409 "VECTOR_UNIT_VSX_P (V2DFmode)"
1411 xvnmsubadp %x0,%x1,%x2
1412 xvnmsubmdp %x0,%x1,%x3
1413 xvnmsubadp %x0,%x1,%x2
1414 xvnmsubmdp %x0,%x1,%x3"
1415 [(set_attr "type" "vecdouble")])
1417 ;; Vector conditional expressions (no scalar version for these instructions)
1418 (define_insn "vsx_eq<mode>"
1419 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1420 (eq:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1421 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1422 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1423 "xvcmpeq<VSs> %x0,%x1,%x2"
1424 [(set_attr "type" "<VStype_simple>")
1425 (set_attr "fp_type" "<VSfptype_simple>")])
1427 (define_insn "vsx_gt<mode>"
1428 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1429 (gt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1430 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1431 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1432 "xvcmpgt<VSs> %x0,%x1,%x2"
1433 [(set_attr "type" "<VStype_simple>")
1434 (set_attr "fp_type" "<VSfptype_simple>")])
1436 (define_insn "*vsx_ge<mode>"
1437 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1438 (ge:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1439 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1440 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1441 "xvcmpge<VSs> %x0,%x1,%x2"
1442 [(set_attr "type" "<VStype_simple>")
1443 (set_attr "fp_type" "<VSfptype_simple>")])
1445 ;; Compare vectors producing a vector result and a predicate, setting CR6 to
1446 ;; indicate a combined status
1447 (define_insn "*vsx_eq_<mode>_p"
1450 [(eq:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
1451 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
1453 (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1454 (eq:VSX_F (match_dup 1)
1456 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1457 "xvcmpeq<VSs>. %x0,%x1,%x2"
1458 [(set_attr "type" "<VStype_simple>")])
1460 (define_insn "*vsx_gt_<mode>_p"
1463 [(gt:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
1464 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
1466 (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1467 (gt:VSX_F (match_dup 1)
1469 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1470 "xvcmpgt<VSs>. %x0,%x1,%x2"
1471 [(set_attr "type" "<VStype_simple>")])
1473 (define_insn "*vsx_ge_<mode>_p"
1476 [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
1477 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
1479 (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1480 (ge:VSX_F (match_dup 1)
1482 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1483 "xvcmpge<VSs>. %x0,%x1,%x2"
1484 [(set_attr "type" "<VStype_simple>")])
1487 (define_insn "*vsx_xxsel<mode>"
1488 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1490 (ne:CC (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,<VSa>")
1491 (match_operand:VSX_L 4 "zero_constant" ""))
1492 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,<VSa>")
1493 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,<VSa>")))]
1494 "VECTOR_MEM_VSX_P (<MODE>mode)"
1495 "xxsel %x0,%x3,%x2,%x1"
1496 [(set_attr "type" "vecperm")])
1498 (define_insn "*vsx_xxsel<mode>_uns"
1499 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1501 (ne:CCUNS (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,<VSa>")
1502 (match_operand:VSX_L 4 "zero_constant" ""))
1503 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,<VSa>")
1504 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,<VSa>")))]
1505 "VECTOR_MEM_VSX_P (<MODE>mode)"
1506 "xxsel %x0,%x3,%x2,%x1"
1507 [(set_attr "type" "vecperm")])
1510 (define_insn "vsx_copysign<mode>3"
1511 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1513 [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1514 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")]
1516 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1517 "xvcpsgn<VSs> %x0,%x2,%x1"
1518 [(set_attr "type" "<VStype_simple>")
1519 (set_attr "fp_type" "<VSfptype_simple>")])
1521 ;; For the conversions, limit the register class for the integer value to be
1522 ;; the fprs because we don't want to add the altivec registers to movdi/movsi.
1523 ;; For the unsigned tests, there isn't a generic double -> unsigned conversion
1524 ;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX.
1525 ;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md
1526 ;; in allowing virtual registers.
1527 (define_insn "vsx_float<VSi><mode>2"
1528 [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=<VSr>,?<VSa>")
1529 (float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
1530 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1531 "xvcvsx<VSc><VSs> %x0,%x1"
1532 [(set_attr "type" "<VStype_simple>")
1533 (set_attr "fp_type" "<VSfptype_simple>")])
1535 (define_insn "vsx_floatuns<VSi><mode>2"
1536 [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=<VSr>,?<VSa>")
1537 (unsigned_float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
1538 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1539 "xvcvux<VSc><VSs> %x0,%x1"
1540 [(set_attr "type" "<VStype_simple>")
1541 (set_attr "fp_type" "<VSfptype_simple>")])
1543 (define_insn "vsx_fix_trunc<mode><VSi>2"
1544 [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
1545 (fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "<VSr>,<VSa>")))]
1546 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1547 "x<VSv>cv<VSs>sx<VSc>s %x0,%x1"
1548 [(set_attr "type" "<VStype_simple>")
1549 (set_attr "fp_type" "<VSfptype_simple>")])
1551 (define_insn "vsx_fixuns_trunc<mode><VSi>2"
1552 [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
1553 (unsigned_fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "<VSr>,<VSa>")))]
1554 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1555 "x<VSv>cv<VSs>ux<VSc>s %x0,%x1"
1556 [(set_attr "type" "<VStype_simple>")
1557 (set_attr "fp_type" "<VSfptype_simple>")])
1559 ;; Math rounding functions
1560 (define_insn "vsx_x<VSv>r<VSs>i"
1561 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1562 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
1563 UNSPEC_VSX_ROUND_I))]
1564 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1565 "x<VSv>r<VSs>i %x0,%x1"
1566 [(set_attr "type" "<VStype_simple>")
1567 (set_attr "fp_type" "<VSfptype_simple>")])
1569 (define_insn "vsx_x<VSv>r<VSs>ic"
1570 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1571 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
1572 UNSPEC_VSX_ROUND_IC))]
1573 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1574 "x<VSv>r<VSs>ic %x0,%x1"
1575 [(set_attr "type" "<VStype_simple>")
1576 (set_attr "fp_type" "<VSfptype_simple>")])
1578 (define_insn "vsx_btrunc<mode>2"
1579 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1580 (fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1581 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1582 "xvr<VSs>iz %x0,%x1"
1583 [(set_attr "type" "<VStype_simple>")
1584 (set_attr "fp_type" "<VSfptype_simple>")])
1586 (define_insn "*vsx_b2trunc<mode>2"
1587 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1588 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
1590 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1591 "x<VSv>r<VSs>iz %x0,%x1"
1592 [(set_attr "type" "<VStype_simple>")
1593 (set_attr "fp_type" "<VSfptype_simple>")])
1595 (define_insn "vsx_floor<mode>2"
1596 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1597 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1599 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1600 "xvr<VSs>im %x0,%x1"
1601 [(set_attr "type" "<VStype_simple>")
1602 (set_attr "fp_type" "<VSfptype_simple>")])
1604 (define_insn "vsx_ceil<mode>2"
1605 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1606 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1608 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1609 "xvr<VSs>ip %x0,%x1"
1610 [(set_attr "type" "<VStype_simple>")
1611 (set_attr "fp_type" "<VSfptype_simple>")])
1614 ;; VSX convert to/from double vector
1616 ;; Convert between single and double precision
1617 ;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal
1618 ;; scalar single precision instructions internally use the double format.
1619 ;; Prefer the altivec registers, since we likely will need to do a vperm
1620 (define_insn "vsx_<VS_spdp_insn>"
1621 [(set (match_operand:<VS_spdp_res> 0 "vsx_register_operand" "=<VSr4>,?<VSa>")
1622 (unspec:<VS_spdp_res> [(match_operand:VSX_SPDP 1 "vsx_register_operand" "<VSr5>,<VSa>")]
1623 UNSPEC_VSX_CVSPDP))]
1624 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1625 "<VS_spdp_insn> %x0,%x1"
1626 [(set_attr "type" "<VS_spdp_type>")])
1628 ;; xscvspdp, represent the scalar SF type as V4SF
1629 (define_insn "vsx_xscvspdp"
1630 [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
1631 (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
1632 UNSPEC_VSX_CVSPDP))]
1633 "VECTOR_UNIT_VSX_P (V4SFmode)"
1635 [(set_attr "type" "fp")])
1637 ;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF
1638 ;; format of scalars is actually DF.
1639 (define_insn "vsx_xscvdpsp_scalar"
1640 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
1641 (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "f")]
1642 UNSPEC_VSX_CVSPDP))]
1643 "VECTOR_UNIT_VSX_P (V4SFmode)"
1645 [(set_attr "type" "fp")])
1647 ;; Same as vsx_xscvspdp, but use SF as the type
1648 (define_insn "vsx_xscvspdp_scalar2"
1649 [(set (match_operand:SF 0 "vsx_register_operand" "=f")
1650 (unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
1651 UNSPEC_VSX_CVSPDP))]
1652 "VECTOR_UNIT_VSX_P (V4SFmode)"
1654 [(set_attr "type" "fp")])
1656 ;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs
1657 (define_insn "vsx_xscvdpspn"
1658 [(set (match_operand:V4SF 0 "vsx_register_operand" "=ww,?ww")
1659 (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "wd,wa")]
1660 UNSPEC_VSX_CVDPSPN))]
1663 [(set_attr "type" "fp")])
1665 (define_insn "vsx_xscvspdpn"
1666 [(set (match_operand:DF 0 "vsx_register_operand" "=ws,?ws")
1667 (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wf,wa")]
1668 UNSPEC_VSX_CVSPDPN))]
1671 [(set_attr "type" "fp")])
1673 (define_insn "vsx_xscvdpspn_scalar"
1674 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,?wa")
1675 (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "ww,ww")]
1676 UNSPEC_VSX_CVDPSPN))]
1679 [(set_attr "type" "fp")])
1681 ;; Used by direct move to move a SFmode value from GPR to VSX register
1682 (define_insn "vsx_xscvspdpn_directmove"
1683 [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
1684 (unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
1685 UNSPEC_VSX_CVSPDPN))]
1688 [(set_attr "type" "fp")])
1690 ;; Convert and scale (used by vec_ctf, vec_cts, vec_ctu for double/long long)
1692 (define_expand "vsx_xvcvsxddp_scale"
1693 [(match_operand:V2DF 0 "vsx_register_operand" "")
1694 (match_operand:V2DI 1 "vsx_register_operand" "")
1695 (match_operand:QI 2 "immediate_operand" "")]
1696 "VECTOR_UNIT_VSX_P (V2DFmode)"
1698 rtx op0 = operands[0];
1699 rtx op1 = operands[1];
1700 int scale = INTVAL(operands[2]);
1701 emit_insn (gen_vsx_xvcvsxddp (op0, op1));
1703 rs6000_scale_v2df (op0, op0, -scale);
1707 (define_insn "vsx_xvcvsxddp"
1708 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
1709 (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
1710 UNSPEC_VSX_XVCVSXDDP))]
1711 "VECTOR_UNIT_VSX_P (V2DFmode)"
1713 [(set_attr "type" "vecdouble")])
1715 (define_expand "vsx_xvcvuxddp_scale"
1716 [(match_operand:V2DF 0 "vsx_register_operand" "")
1717 (match_operand:V2DI 1 "vsx_register_operand" "")
1718 (match_operand:QI 2 "immediate_operand" "")]
1719 "VECTOR_UNIT_VSX_P (V2DFmode)"
1721 rtx op0 = operands[0];
1722 rtx op1 = operands[1];
1723 int scale = INTVAL(operands[2]);
1724 emit_insn (gen_vsx_xvcvuxddp (op0, op1));
1726 rs6000_scale_v2df (op0, op0, -scale);
1730 (define_insn "vsx_xvcvuxddp"
1731 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
1732 (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
1733 UNSPEC_VSX_XVCVUXDDP))]
1734 "VECTOR_UNIT_VSX_P (V2DFmode)"
1736 [(set_attr "type" "vecdouble")])
1738 (define_expand "vsx_xvcvdpsxds_scale"
1739 [(match_operand:V2DI 0 "vsx_register_operand" "")
1740 (match_operand:V2DF 1 "vsx_register_operand" "")
1741 (match_operand:QI 2 "immediate_operand" "")]
1742 "VECTOR_UNIT_VSX_P (V2DFmode)"
1744 rtx op0 = operands[0];
1745 rtx op1 = operands[1];
1747 int scale = INTVAL (operands[2]);
1752 tmp = gen_reg_rtx (V2DFmode);
1753 rs6000_scale_v2df (tmp, op1, scale);
1755 emit_insn (gen_vsx_xvcvdpsxds (op0, tmp));
1759 (define_insn "vsx_xvcvdpsxds"
1760 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1761 (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
1762 UNSPEC_VSX_XVCVDPSXDS))]
1763 "VECTOR_UNIT_VSX_P (V2DFmode)"
1764 "xvcvdpsxds %x0,%x1"
1765 [(set_attr "type" "vecdouble")])
1767 (define_expand "vsx_xvcvdpuxds_scale"
1768 [(match_operand:V2DI 0 "vsx_register_operand" "")
1769 (match_operand:V2DF 1 "vsx_register_operand" "")
1770 (match_operand:QI 2 "immediate_operand" "")]
1771 "VECTOR_UNIT_VSX_P (V2DFmode)"
1773 rtx op0 = operands[0];
1774 rtx op1 = operands[1];
1776 int scale = INTVAL (operands[2]);
1781 tmp = gen_reg_rtx (V2DFmode);
1782 rs6000_scale_v2df (tmp, op1, scale);
1784 emit_insn (gen_vsx_xvcvdpuxds (op0, tmp));
1788 (define_insn "vsx_xvcvdpuxds"
1789 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1790 (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
1791 UNSPEC_VSX_XVCVDPUXDS))]
1792 "VECTOR_UNIT_VSX_P (V2DFmode)"
1793 "xvcvdpuxds %x0,%x1"
1794 [(set_attr "type" "vecdouble")])
1796 ;; Convert from 64-bit to 32-bit types
1797 ;; Note, favor the Altivec registers since the usual use of these instructions
1798 ;; is in vector converts and we need to use the Altivec vperm instruction.
1800 (define_insn "vsx_xvcvdpsxws"
1801 [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
1802 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
1803 UNSPEC_VSX_CVDPSXWS))]
1804 "VECTOR_UNIT_VSX_P (V2DFmode)"
1805 "xvcvdpsxws %x0,%x1"
1806 [(set_attr "type" "vecdouble")])
1808 (define_insn "vsx_xvcvdpuxws"
1809 [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
1810 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
1811 UNSPEC_VSX_CVDPUXWS))]
1812 "VECTOR_UNIT_VSX_P (V2DFmode)"
1813 "xvcvdpuxws %x0,%x1"
1814 [(set_attr "type" "vecdouble")])
1816 (define_insn "vsx_xvcvsxdsp"
1817 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wd,?wa")
1818 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wf,wa")]
1819 UNSPEC_VSX_CVSXDSP))]
1820 "VECTOR_UNIT_VSX_P (V2DFmode)"
1822 [(set_attr "type" "vecfloat")])
1824 (define_insn "vsx_xvcvuxdsp"
1825 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wd,?wa")
1826 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wf,wa")]
1827 UNSPEC_VSX_CVUXDSP))]
1828 "VECTOR_UNIT_VSX_P (V2DFmode)"
1830 [(set_attr "type" "vecdouble")])
1832 ;; Convert from 32-bit to 64-bit types
1833 (define_insn "vsx_xvcvsxwdp"
1834 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
1835 (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
1836 UNSPEC_VSX_CVSXWDP))]
1837 "VECTOR_UNIT_VSX_P (V2DFmode)"
1839 [(set_attr "type" "vecdouble")])
1841 (define_insn "vsx_xvcvuxwdp"
1842 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
1843 (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
1844 UNSPEC_VSX_CVUXWDP))]
1845 "VECTOR_UNIT_VSX_P (V2DFmode)"
1847 [(set_attr "type" "vecdouble")])
1849 (define_insn "vsx_xvcvspsxds"
1850 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
1851 (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
1852 UNSPEC_VSX_CVSPSXDS))]
1853 "VECTOR_UNIT_VSX_P (V2DFmode)"
1854 "xvcvspsxds %x0,%x1"
1855 [(set_attr "type" "vecdouble")])
1857 (define_insn "vsx_xvcvspuxds"
1858 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
1859 (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
1860 UNSPEC_VSX_CVSPUXDS))]
1861 "VECTOR_UNIT_VSX_P (V2DFmode)"
1862 "xvcvspuxds %x0,%x1"
1863 [(set_attr "type" "vecdouble")])
1865 ;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since
1866 ;; since the xvrdpiz instruction does not truncate the value if the floating
1867 ;; point value is < LONG_MIN or > LONG_MAX.
1868 (define_insn "*vsx_float_fix_v2df2"
1869 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
1872 (match_operand:V2DF 1 "vsx_register_operand" "wd,?wa"))))]
1873 "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
1874 && VECTOR_UNIT_VSX_P (V2DFmode) && flag_unsafe_math_optimizations
1875 && !flag_trapping_math && TARGET_FRIZ"
1877 [(set_attr "type" "vecdouble")
1878 (set_attr "fp_type" "fp_addsub_d")])
1881 ;; Permute operations
1883 ;; Build a V2DF/V2DI vector from two scalars
1884 (define_insn "vsx_concat_<mode>"
1885 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1887 (match_operand:<VS_scalar> 1 "vsx_register_operand" "<VS_64reg>,<VSa>")
1888 (match_operand:<VS_scalar> 2 "vsx_register_operand" "<VS_64reg>,<VSa>")))]
1889 "VECTOR_MEM_VSX_P (<MODE>mode)"
1891 if (BYTES_BIG_ENDIAN)
1892 return "xxpermdi %x0,%x1,%x2,0";
1894 return "xxpermdi %x0,%x2,%x1,0";
1896 [(set_attr "type" "vecperm")])
1898 ;; Special purpose concat using xxpermdi to glue two single precision values
1899 ;; together, relying on the fact that internally scalar floats are represented
1900 ;; as doubles. This is used to initialize a V4SF vector with 4 floats
1901 (define_insn "vsx_concat_v2sf"
1902 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
1904 [(match_operand:SF 1 "vsx_register_operand" "f,f")
1905 (match_operand:SF 2 "vsx_register_operand" "f,f")]
1906 UNSPEC_VSX_CONCAT))]
1907 "VECTOR_MEM_VSX_P (V2DFmode)"
1909 if (BYTES_BIG_ENDIAN)
1910 return "xxpermdi %x0,%x1,%x2,0";
1912 return "xxpermdi %x0,%x2,%x1,0";
1914 [(set_attr "type" "vecperm")])
1916 ;; xxpermdi for little endian loads and stores. We need several of
1917 ;; these since the form of the PARALLEL differs by mode.
1918 (define_insn "*vsx_xxpermdi2_le_<mode>"
1919 [(set (match_operand:VSX_LE 0 "vsx_register_operand" "=<VSa>")
1921 (match_operand:VSX_LE 1 "vsx_register_operand" "<VSa>")
1922 (parallel [(const_int 1) (const_int 0)])))]
1923 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
1924 "xxpermdi %x0,%x1,%x1,2"
1925 [(set_attr "type" "vecperm")])
1927 (define_insn "*vsx_xxpermdi4_le_<mode>"
1928 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
1930 (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
1931 (parallel [(const_int 2) (const_int 3)
1932 (const_int 0) (const_int 1)])))]
1933 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
1934 "xxpermdi %x0,%x1,%x1,2"
1935 [(set_attr "type" "vecperm")])
1937 (define_insn "*vsx_xxpermdi8_le_V8HI"
1938 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1940 (match_operand:V8HI 1 "vsx_register_operand" "wa")
1941 (parallel [(const_int 4) (const_int 5)
1942 (const_int 6) (const_int 7)
1943 (const_int 0) (const_int 1)
1944 (const_int 2) (const_int 3)])))]
1945 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"
1946 "xxpermdi %x0,%x1,%x1,2"
1947 [(set_attr "type" "vecperm")])
1949 (define_insn "*vsx_xxpermdi16_le_V16QI"
1950 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1952 (match_operand:V16QI 1 "vsx_register_operand" "wa")
1953 (parallel [(const_int 8) (const_int 9)
1954 (const_int 10) (const_int 11)
1955 (const_int 12) (const_int 13)
1956 (const_int 14) (const_int 15)
1957 (const_int 0) (const_int 1)
1958 (const_int 2) (const_int 3)
1959 (const_int 4) (const_int 5)
1960 (const_int 6) (const_int 7)])))]
1961 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"
1962 "xxpermdi %x0,%x1,%x1,2"
1963 [(set_attr "type" "vecperm")])
1965 ;; lxvd2x for little endian loads. We need several of
1966 ;; these since the form of the PARALLEL differs by mode.
1967 (define_insn "*vsx_lxvd2x2_le_<mode>"
1968 [(set (match_operand:VSX_LE 0 "vsx_register_operand" "=<VSa>")
1970 (match_operand:VSX_LE 1 "memory_operand" "Z")
1971 (parallel [(const_int 1) (const_int 0)])))]
1972 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
1974 [(set_attr "type" "vecload")])
1976 (define_insn "*vsx_lxvd2x4_le_<mode>"
1977 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
1979 (match_operand:VSX_W 1 "memory_operand" "Z")
1980 (parallel [(const_int 2) (const_int 3)
1981 (const_int 0) (const_int 1)])))]
1982 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
1984 [(set_attr "type" "vecload")])
1986 (define_insn "*vsx_lxvd2x8_le_V8HI"
1987 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1989 (match_operand:V8HI 1 "memory_operand" "Z")
1990 (parallel [(const_int 4) (const_int 5)
1991 (const_int 6) (const_int 7)
1992 (const_int 0) (const_int 1)
1993 (const_int 2) (const_int 3)])))]
1994 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
1996 [(set_attr "type" "vecload")])
1998 (define_insn "*vsx_lxvd2x16_le_V16QI"
1999 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
2001 (match_operand:V16QI 1 "memory_operand" "Z")
2002 (parallel [(const_int 8) (const_int 9)
2003 (const_int 10) (const_int 11)
2004 (const_int 12) (const_int 13)
2005 (const_int 14) (const_int 15)
2006 (const_int 0) (const_int 1)
2007 (const_int 2) (const_int 3)
2008 (const_int 4) (const_int 5)
2009 (const_int 6) (const_int 7)])))]
2010 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
2012 [(set_attr "type" "vecload")])
2014 ;; stxvd2x for little endian stores. We need several of
2015 ;; these since the form of the PARALLEL differs by mode.
2016 (define_insn "*vsx_stxvd2x2_le_<mode>"
2017 [(set (match_operand:VSX_LE 0 "memory_operand" "=Z")
2019 (match_operand:VSX_LE 1 "vsx_register_operand" "<VSa>")
2020 (parallel [(const_int 1) (const_int 0)])))]
2021 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
2023 [(set_attr "type" "vecstore")])
2025 (define_insn "*vsx_stxvd2x4_le_<mode>"
2026 [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
2028 (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
2029 (parallel [(const_int 2) (const_int 3)
2030 (const_int 0) (const_int 1)])))]
2031 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
2033 [(set_attr "type" "vecstore")])
2035 (define_insn "*vsx_stxvd2x8_le_V8HI"
2036 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
2038 (match_operand:V8HI 1 "vsx_register_operand" "wa")
2039 (parallel [(const_int 4) (const_int 5)
2040 (const_int 6) (const_int 7)
2041 (const_int 0) (const_int 1)
2042 (const_int 2) (const_int 3)])))]
2043 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
2045 [(set_attr "type" "vecstore")])
2047 (define_insn "*vsx_stxvd2x16_le_V16QI"
2048 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
2050 (match_operand:V16QI 1 "vsx_register_operand" "wa")
2051 (parallel [(const_int 8) (const_int 9)
2052 (const_int 10) (const_int 11)
2053 (const_int 12) (const_int 13)
2054 (const_int 14) (const_int 15)
2055 (const_int 0) (const_int 1)
2056 (const_int 2) (const_int 3)
2057 (const_int 4) (const_int 5)
2058 (const_int 6) (const_int 7)])))]
2059 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
2061 [(set_attr "type" "vecstore")])
2063 ;; Convert a TImode value into V1TImode
2064 (define_expand "vsx_set_v1ti"
2065 [(match_operand:V1TI 0 "nonimmediate_operand" "")
2066 (match_operand:V1TI 1 "nonimmediate_operand" "")
2067 (match_operand:TI 2 "input_operand" "")
2068 (match_operand:QI 3 "u5bit_cint_operand" "")]
2069 "VECTOR_MEM_VSX_P (V1TImode)"
2071 if (operands[3] != const0_rtx)
2074 emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1]));
2078 ;; Set the element of a V2DI/VD2F mode
2079 (define_insn "vsx_set_<mode>"
2080 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,?<VSa>")
2082 [(match_operand:VSX_D 1 "vsx_register_operand" "wd,<VSa>")
2083 (match_operand:<VS_scalar> 2 "vsx_register_operand" "<VS_64reg>,<VSa>")
2084 (match_operand:QI 3 "u5bit_cint_operand" "i,i")]
2086 "VECTOR_MEM_VSX_P (<MODE>mode)"
2088 int idx_first = BYTES_BIG_ENDIAN ? 0 : 1;
2089 if (INTVAL (operands[3]) == idx_first)
2090 return \"xxpermdi %x0,%x2,%x1,1\";
2091 else if (INTVAL (operands[3]) == 1 - idx_first)
2092 return \"xxpermdi %x0,%x1,%x2,0\";
2096 [(set_attr "type" "vecperm")])
2098 ;; Extract a DF/DI element from V2DF/V2DI
2099 ;; Optimize cases were we can do a simple or direct move.
2100 ;; Or see if we can avoid doing the move at all
2102 ;; There are some unresolved problems with reload that show up if an Altivec
2103 ;; register was picked. Limit the scalar value to FPRs for now.
2105 (define_insn "vsx_extract_<mode>"
2106 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand"
2109 (vec_select:<VS_scalar>
2110 (match_operand:VSX_D 1 "gpc_reg_operand"
2111 "<VSa>, <VSa>, <VSa>, <VSa>")
2114 [(match_operand:QI 2 "const_0_to_1_operand"
2115 "wD, wD, wL, n")])))]
2116 "VECTOR_MEM_VSX_P (<MODE>mode)"
2118 int element = INTVAL (operands[2]);
2119 int op0_regno = REGNO (operands[0]);
2120 int op1_regno = REGNO (operands[1]);
2123 gcc_assert (IN_RANGE (element, 0, 1));
2124 gcc_assert (VSX_REGNO_P (op1_regno));
2126 if (element == VECTOR_ELEMENT_SCALAR_64BIT)
2128 if (op0_regno == op1_regno)
2129 return ASM_COMMENT_START " vec_extract to same register";
2131 else if (INT_REGNO_P (op0_regno) && TARGET_DIRECT_MOVE
2132 && TARGET_POWERPC64)
2133 return "mfvsrd %0,%x1";
2135 else if (FP_REGNO_P (op0_regno) && FP_REGNO_P (op1_regno))
2138 else if (VSX_REGNO_P (op0_regno))
2139 return "xxlor %x0,%x1,%x1";
2145 else if (element == VECTOR_ELEMENT_MFVSRLD_64BIT && INT_REGNO_P (op0_regno)
2146 && TARGET_P9_VECTOR && TARGET_POWERPC64 && TARGET_DIRECT_MOVE)
2147 return "mfvsrdl %0,%x1";
2149 else if (VSX_REGNO_P (op0_regno))
2151 fldDM = element << 1;
2152 if (!BYTES_BIG_ENDIAN)
2154 operands[3] = GEN_INT (fldDM);
2155 return "xxpermdi %x0,%x1,%x1,%3";
2161 [(set_attr "type" "vecsimple,mftgpr,mftgpr,vecperm")])
2163 ;; Optimize extracting a single scalar element from memory if the scalar is in
2164 ;; the correct location to use a single load.
2165 (define_insn "*vsx_extract_<mode>_load"
2166 [(set (match_operand:<VS_scalar> 0 "register_operand" "=d,wv,wr")
2167 (vec_select:<VS_scalar>
2168 (match_operand:VSX_D 1 "memory_operand" "m,Z,m")
2169 (parallel [(const_int 0)])))]
2170 "VECTOR_MEM_VSX_P (<MODE>mode)"
2175 [(set_attr "type" "fpload,fpload,load")
2176 (set_attr "length" "4")])
2178 ;; Optimize storing a single scalar element that is the right location to
2180 (define_insn "*vsx_extract_<mode>_store"
2181 [(set (match_operand:<VS_scalar> 0 "memory_operand" "=m,Z,?Z")
2182 (vec_select:<VS_scalar>
2183 (match_operand:VSX_D 1 "register_operand" "d,wd,<VSa>")
2184 (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))]
2185 "VECTOR_MEM_VSX_P (<MODE>mode)"
2190 [(set_attr "type" "fpstore")
2191 (set_attr "length" "4")])
2193 ;; Extract a SF element from V4SF
2194 (define_insn_and_split "vsx_extract_v4sf"
2195 [(set (match_operand:SF 0 "vsx_register_operand" "=f,f")
2197 (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2198 (parallel [(match_operand:QI 2 "u5bit_cint_operand" "O,i")])))
2199 (clobber (match_scratch:V4SF 3 "=X,0"))]
2200 "VECTOR_UNIT_VSX_P (V4SFmode)"
2208 rtx op0 = operands[0];
2209 rtx op1 = operands[1];
2210 rtx op2 = operands[2];
2211 rtx op3 = operands[3];
2213 HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2);
2219 if (GET_CODE (op3) == SCRATCH)
2220 op3 = gen_reg_rtx (V4SFmode);
2221 emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, GEN_INT (ele)));
2224 emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp));
2227 [(set_attr "length" "4,8")
2228 (set_attr "type" "fp")])
2230 ;; Expand the builtin form of xxpermdi to canonical rtl.
2231 (define_expand "vsx_xxpermdi_<mode>"
2232 [(match_operand:VSX_L 0 "vsx_register_operand" "")
2233 (match_operand:VSX_L 1 "vsx_register_operand" "")
2234 (match_operand:VSX_L 2 "vsx_register_operand" "")
2235 (match_operand:QI 3 "u5bit_cint_operand" "")]
2236 "VECTOR_MEM_VSX_P (<MODE>mode)"
2238 rtx target = operands[0];
2239 rtx op0 = operands[1];
2240 rtx op1 = operands[2];
2241 int mask = INTVAL (operands[3]);
2242 rtx perm0 = GEN_INT ((mask >> 1) & 1);
2243 rtx perm1 = GEN_INT ((mask & 1) + 2);
2244 rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
2246 if (<MODE>mode == V2DFmode)
2247 gen = gen_vsx_xxpermdi2_v2df_1;
2250 gen = gen_vsx_xxpermdi2_v2di_1;
2251 if (<MODE>mode != V2DImode)
2253 target = gen_lowpart (V2DImode, target);
2254 op0 = gen_lowpart (V2DImode, op0);
2255 op1 = gen_lowpart (V2DImode, op1);
2258 /* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a
2259 transformation we don't want; it is necessary for
2260 rs6000_expand_vec_perm_const_1 but not for this use. So we
2261 prepare for that by reversing the transformation here. */
2262 if (BYTES_BIG_ENDIAN)
2263 emit_insn (gen (target, op0, op1, perm0, perm1));
2266 rtx p0 = GEN_INT (3 - INTVAL (perm1));
2267 rtx p1 = GEN_INT (3 - INTVAL (perm0));
2268 emit_insn (gen (target, op1, op0, p0, p1));
2273 (define_insn "vsx_xxpermdi2_<mode>_1"
2274 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd")
2276 (vec_concat:<VS_double>
2277 (match_operand:VSX_D 1 "vsx_register_operand" "wd")
2278 (match_operand:VSX_D 2 "vsx_register_operand" "wd"))
2279 (parallel [(match_operand 3 "const_0_to_1_operand" "")
2280 (match_operand 4 "const_2_to_3_operand" "")])))]
2281 "VECTOR_MEM_VSX_P (<MODE>mode)"
2285 /* For little endian, swap operands and invert/swap selectors
2286 to get the correct xxpermdi. The operand swap sets up the
2287 inputs as a little endian array. The selectors are swapped
2288 because they are defined to use big endian ordering. The
2289 selectors are inverted to get the correct doublewords for
2290 little endian ordering. */
2291 if (BYTES_BIG_ENDIAN)
2293 op3 = INTVAL (operands[3]);
2294 op4 = INTVAL (operands[4]);
2298 op3 = 3 - INTVAL (operands[4]);
2299 op4 = 3 - INTVAL (operands[3]);
2302 mask = (op3 << 1) | (op4 - 2);
2303 operands[3] = GEN_INT (mask);
2305 if (BYTES_BIG_ENDIAN)
2306 return "xxpermdi %x0,%x1,%x2,%3";
2308 return "xxpermdi %x0,%x2,%x1,%3";
2310 [(set_attr "type" "vecperm")])
2312 (define_expand "vec_perm_const<mode>"
2313 [(match_operand:VSX_D 0 "vsx_register_operand" "")
2314 (match_operand:VSX_D 1 "vsx_register_operand" "")
2315 (match_operand:VSX_D 2 "vsx_register_operand" "")
2316 (match_operand:V2DI 3 "" "")]
2317 "VECTOR_MEM_VSX_P (<MODE>mode)"
2319 if (rs6000_expand_vec_perm_const (operands))
2325 ;; Expanders for builtins
2326 (define_expand "vsx_mergel_<mode>"
2327 [(use (match_operand:VSX_D 0 "vsx_register_operand" ""))
2328 (use (match_operand:VSX_D 1 "vsx_register_operand" ""))
2329 (use (match_operand:VSX_D 2 "vsx_register_operand" ""))]
2330 "VECTOR_MEM_VSX_P (<MODE>mode)"
2335 /* Special handling for LE with -maltivec=be. */
2336 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
2338 v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
2339 x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]);
2343 v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
2344 x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
2347 x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
2348 emit_insn (gen_rtx_SET (operands[0], x));
2352 (define_expand "vsx_mergeh_<mode>"
2353 [(use (match_operand:VSX_D 0 "vsx_register_operand" ""))
2354 (use (match_operand:VSX_D 1 "vsx_register_operand" ""))
2355 (use (match_operand:VSX_D 2 "vsx_register_operand" ""))]
2356 "VECTOR_MEM_VSX_P (<MODE>mode)"
2361 /* Special handling for LE with -maltivec=be. */
2362 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
2364 v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
2365 x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]);
2369 v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
2370 x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
2373 x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
2374 emit_insn (gen_rtx_SET (operands[0], x));
2379 (define_insn "vsx_splat_<mode>"
2380 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>,<VSa>,we")
2381 (vec_duplicate:VSX_D
2382 (match_operand:<VS_scalar> 1 "splat_input_operand" "<VS_64reg>,Z,b")))]
2383 "VECTOR_MEM_VSX_P (<MODE>mode)"
2385 xxpermdi %x0,%x1,%x1,0
2388 [(set_attr "type" "vecperm,vecload,mftgpr")])
2390 ;; V4SI splat (ISA 3.0)
2391 ;; When SI's are allowed in VSX registers, add XXSPLTW support
2392 (define_expand "vsx_splat_<mode>"
2393 [(set (match_operand:VSX_W 0 "vsx_register_operand" "")
2394 (vec_duplicate:VSX_W
2395 (match_operand:<VS_scalar> 1 "splat_input_operand" "")))]
2398 if (MEM_P (operands[1]))
2399 operands[1] = rs6000_address_for_fpconvert (operands[1]);
2400 else if (!REG_P (operands[1]))
2401 operands[1] = force_reg (<VS_scalar>mode, operands[1]);
2404 (define_insn "*vsx_splat_v4si_internal"
2405 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,wa")
2407 (match_operand:SI 1 "splat_input_operand" "r,Z")))]
2412 [(set_attr "type" "mftgpr,vecload")])
2414 ;; V4SF splat (ISA 3.0)
2415 (define_insn_and_split "*vsx_splat_v4sf_internal"
2416 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,wa")
2418 (match_operand:SF 1 "splat_input_operand" "Z,wy,r")))]
2424 "&& reload_completed && vsx_register_operand (operands[1], SFmode)"
2426 (unspec:V4SF [(match_dup 1)] UNSPEC_VSX_CVDPSPN))
2429 (vec_select:SF (match_dup 0)
2430 (parallel [(const_int 0)]))))]
2432 [(set_attr "type" "vecload,vecperm,mftgpr")
2433 (set_attr "length" "4,8,4")])
2435 ;; V4SF/V4SI splat from a vector element
2436 (define_insn "vsx_xxspltw_<mode>"
2437 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
2438 (vec_duplicate:VSX_W
2439 (vec_select:<VS_scalar>
2440 (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
2442 [(match_operand:QI 2 "u5bit_cint_operand" "i,i")]))))]
2443 "VECTOR_MEM_VSX_P (<MODE>mode)"
2445 if (!BYTES_BIG_ENDIAN)
2446 operands[2] = GEN_INT (3 - INTVAL (operands[2]));
2448 return "xxspltw %x0,%x1,%2";
2450 [(set_attr "type" "vecperm")])
2452 (define_insn "vsx_xxspltw_<mode>_direct"
2453 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
2454 (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
2455 (match_operand:QI 2 "u5bit_cint_operand" "i,i")]
2456 UNSPEC_VSX_XXSPLTW))]
2457 "VECTOR_MEM_VSX_P (<MODE>mode)"
2458 "xxspltw %x0,%x1,%2"
2459 [(set_attr "type" "vecperm")])
2461 ;; V2DF/V2DI splat for use by vec_splat builtin
2462 (define_insn "vsx_xxspltd_<mode>"
2463 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2464 (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
2465 (match_operand:QI 2 "u5bit_cint_operand" "i")]
2466 UNSPEC_VSX_XXSPLTD))]
2467 "VECTOR_MEM_VSX_P (<MODE>mode)"
2469 if ((VECTOR_ELT_ORDER_BIG && INTVAL (operands[2]) == 0)
2470 || (!VECTOR_ELT_ORDER_BIG && INTVAL (operands[2]) == 1))
2471 return "xxpermdi %x0,%x1,%x1,0";
2473 return "xxpermdi %x0,%x1,%x1,3";
2475 [(set_attr "type" "vecperm")])
2477 ;; V4SF/V4SI interleave
2478 (define_insn "vsx_xxmrghw_<mode>"
2479 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
2481 (vec_concat:<VS_double>
2482 (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
2483 (match_operand:VSX_W 2 "vsx_register_operand" "wf,<VSa>"))
2484 (parallel [(const_int 0) (const_int 4)
2485 (const_int 1) (const_int 5)])))]
2486 "VECTOR_MEM_VSX_P (<MODE>mode)"
2488 if (BYTES_BIG_ENDIAN)
2489 return "xxmrghw %x0,%x1,%x2";
2491 return "xxmrglw %x0,%x2,%x1";
2493 [(set_attr "type" "vecperm")])
2495 (define_insn "vsx_xxmrglw_<mode>"
2496 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
2498 (vec_concat:<VS_double>
2499 (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
2500 (match_operand:VSX_W 2 "vsx_register_operand" "wf,?<VSa>"))
2501 (parallel [(const_int 2) (const_int 6)
2502 (const_int 3) (const_int 7)])))]
2503 "VECTOR_MEM_VSX_P (<MODE>mode)"
2505 if (BYTES_BIG_ENDIAN)
2506 return "xxmrglw %x0,%x1,%x2";
2508 return "xxmrghw %x0,%x2,%x1";
2510 [(set_attr "type" "vecperm")])
2512 ;; Shift left double by word immediate
2513 (define_insn "vsx_xxsldwi_<mode>"
2514 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSa>")
2515 (unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "<VSa>")
2516 (match_operand:VSX_L 2 "vsx_register_operand" "<VSa>")
2517 (match_operand:QI 3 "u5bit_cint_operand" "i")]
2519 "VECTOR_MEM_VSX_P (<MODE>mode)"
2520 "xxsldwi %x0,%x1,%x2,%3"
2521 [(set_attr "type" "vecperm")])
2524 ;; Vector reduction insns and splitters
2526 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v2df"
2527 [(set (match_operand:V2DF 0 "vfloat_operand" "=&wd,&?wa,wd,?wa")
2531 (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
2532 (parallel [(const_int 1)]))
2535 (parallel [(const_int 0)])))
2537 (clobber (match_scratch:V2DF 2 "=0,0,&wd,&wa"))]
2538 "VECTOR_UNIT_VSX_P (V2DFmode)"
2544 rtx tmp = (GET_CODE (operands[2]) == SCRATCH)
2545 ? gen_reg_rtx (V2DFmode)
2547 emit_insn (gen_vsx_xxsldwi_v2df (tmp, operands[1], operands[1], const2_rtx));
2548 emit_insn (gen_<VEC_reduc_rtx>v2df3 (operands[0], tmp, operands[1]));
2551 [(set_attr "length" "8")
2552 (set_attr "type" "veccomplex")])
2554 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v4sf"
2555 [(set (match_operand:V4SF 0 "vfloat_operand" "=wf,?wa")
2557 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
2558 (match_operand:V4SF 1 "vfloat_operand" "wf,wa")))
2559 (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
2560 (clobber (match_scratch:V4SF 3 "=&wf,&wa"))]
2561 "VECTOR_UNIT_VSX_P (V4SFmode)"
2567 rtx op0 = operands[0];
2568 rtx op1 = operands[1];
2569 rtx tmp2, tmp3, tmp4;
2571 if (can_create_pseudo_p ())
2573 tmp2 = gen_reg_rtx (V4SFmode);
2574 tmp3 = gen_reg_rtx (V4SFmode);
2575 tmp4 = gen_reg_rtx (V4SFmode);
2584 emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
2585 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
2586 emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
2587 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (op0, tmp4, tmp3));
2590 [(set_attr "length" "16")
2591 (set_attr "type" "veccomplex")])
2593 ;; Combiner patterns with the vector reduction patterns that knows we can get
2594 ;; to the top element of the V2DF array without doing an extract.
2596 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar"
2597 [(set (match_operand:DF 0 "vfloat_operand" "=&ws,&?ws,ws,?ws")
2602 (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
2603 (parallel [(const_int 1)]))
2606 (parallel [(const_int 0)])))
2608 (parallel [(const_int 1)])))
2609 (clobber (match_scratch:DF 2 "=0,0,&wd,&wa"))]
2610 "VECTOR_UNIT_VSX_P (V2DFmode)"
2616 rtx hi = gen_highpart (DFmode, operands[1]);
2617 rtx lo = (GET_CODE (operands[2]) == SCRATCH)
2618 ? gen_reg_rtx (DFmode)
2621 emit_insn (gen_vsx_extract_v2df (lo, operands[1], const1_rtx));
2622 emit_insn (gen_<VEC_reduc_rtx>df3 (operands[0], hi, lo));
2625 [(set_attr "length" "8")
2626 (set_attr "type" "veccomplex")])
2628 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf_scalar"
2629 [(set (match_operand:SF 0 "vfloat_operand" "=f,?f")
2632 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
2633 (match_operand:V4SF 1 "vfloat_operand" "wf,wa"))
2634 (parallel [(const_int 3)])))
2635 (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
2636 (clobber (match_scratch:V4SF 3 "=&wf,&wa"))
2637 (clobber (match_scratch:V4SF 4 "=0,0"))]
2638 "VECTOR_UNIT_VSX_P (V4SFmode)"
2644 rtx op0 = operands[0];
2645 rtx op1 = operands[1];
2646 rtx tmp2, tmp3, tmp4, tmp5;
2648 if (can_create_pseudo_p ())
2650 tmp2 = gen_reg_rtx (V4SFmode);
2651 tmp3 = gen_reg_rtx (V4SFmode);
2652 tmp4 = gen_reg_rtx (V4SFmode);
2653 tmp5 = gen_reg_rtx (V4SFmode);
2663 emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
2664 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
2665 emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
2666 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp5, tmp4, tmp3));
2667 emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp5));
2670 [(set_attr "length" "20")
2671 (set_attr "type" "veccomplex")])
2674 ;; Power8 Vector fusion. The fused ops must be physically adjacent.
2676 [(set (match_operand:P 0 "base_reg_operand" "")
2677 (match_operand:P 1 "short_cint_operand" ""))
2678 (set (match_operand:VSX_M 2 "vsx_register_operand" "")
2679 (mem:VSX_M (plus:P (match_dup 0)
2680 (match_operand:P 3 "int_reg_operand" ""))))]
2681 "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
2682 "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M:VSm>x %x2,%0,%3"
2683 [(set_attr "length" "8")
2684 (set_attr "type" "vecload")])
2687 [(set (match_operand:P 0 "base_reg_operand" "")
2688 (match_operand:P 1 "short_cint_operand" ""))
2689 (set (match_operand:VSX_M 2 "vsx_register_operand" "")
2690 (mem:VSX_M (plus:P (match_operand:P 3 "int_reg_operand" "")
2692 "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
2693 "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M:VSm>x %x2,%0,%3"
2694 [(set_attr "length" "8")
2695 (set_attr "type" "vecload")])
2698 ;; ISA 3.0 vector extend sign support
2700 (define_insn "vsx_sign_extend_qi_<mode>"
2701 [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
2703 [(match_operand:V16QI 1 "vsx_register_operand" "v")]
2704 UNSPEC_VSX_SIGN_EXTEND))]
2707 [(set_attr "type" "vecsimple")])
2709 (define_insn "vsx_sign_extend_hi_<mode>"
2710 [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
2712 [(match_operand:V8HI 1 "vsx_register_operand" "v")]
2713 UNSPEC_VSX_SIGN_EXTEND))]
2716 [(set_attr "type" "vecsimple")])
2718 (define_insn "*vsx_sign_extend_si_v2di"
2719 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
2720 (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
2721 UNSPEC_VSX_SIGN_EXTEND))]
2724 [(set_attr "type" "vecsimple")])
2727 ;; ISA 3.0 memory operations
2728 (define_insn "p9_lxsi<wd>zx"
2729 [(set (match_operand:DI 0 "vsx_register_operand" "=wi")
2730 (unspec:DI [(zero_extend:DI
2731 (match_operand:QHI 1 "indexed_or_indirect_operand" "Z"))]
2734 "lxsi<wd>zx %x0,%y1"
2735 [(set_attr "type" "fpload")])
2737 (define_insn "p9_stxsi<wd>x"
2738 [(set (match_operand:QHI 0 "reg_or_indexed_operand" "=r,Z")
2739 (unspec:QHI [(match_operand:DI 1 "vsx_register_operand" "wi,wi")]
2745 [(set_attr "type" "mffgpr,fpstore")])