From cfdba70a5e50791d4a5ae2acd8e42d07754bcb15 Mon Sep 17 00:00:00 2001 From: tnfchris Date: Mon, 16 Oct 2017 09:56:41 +0000 Subject: [PATCH] 2017-10-16 Tamar Christina * config/aarch64/aarch64-builtins.c (aarch64_types_quadopu_lane_qualifiers): New. (TYPES_QUADOPU_LANE): New. * config/aarch64/aarch64-simd.md (aarch64_dot): New. (dot_prod, aarch64_dot_lane): New. (aarch64_dot_laneq): New. * config/aarch64/aarch64-simd-builtins.def (sdot, udot): New. (sdot_lane, udot_lane, sdot_laneq, udot_laneq): New. * config/aarch64/iterators.md (sur): Add UNSPEC_SDOT, UNSPEC_UDOT. (Vdottype, DOTPROD): New. (sur): Add SDOT and UDOT. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@253783 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 14 +++++ gcc/config/aarch64/aarch64-builtins.c | 5 ++ gcc/config/aarch64/aarch64-simd-builtins.def | 8 +++ gcc/config/aarch64/aarch64-simd.md | 81 ++++++++++++++++++++++++++++ gcc/config/aarch64/iterators.md | 8 +++ 5 files changed, 116 insertions(+) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index f4228ff2385..bf6c3d82907 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,19 @@ 2017-10-16 Tamar Christina + * config/aarch64/aarch64-builtins.c + (aarch64_types_quadopu_lane_qualifiers): New. + (TYPES_QUADOPU_LANE): New. + * config/aarch64/aarch64-simd.md (aarch64_dot): New. + (dot_prod, aarch64_dot_lane): New. + (aarch64_dot_laneq): New. + * config/aarch64/aarch64-simd-builtins.def (sdot, udot): New. + (sdot_lane, udot_lane, sdot_laneq, udot_laneq): New. + * config/aarch64/iterators.md (sur): Add UNSPEC_SDOT, UNSPEC_UDOT. + (Vdottype, DOTPROD): New. + (sur): Add SDOT and UDOT. + +2017-10-16 Tamar Christina + * config/aarch64/aarch64.h (AARCH64_FL_DOTPROD): New. (AARCH64_ISA_DOTPROD, TARGET_DOTPROD): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): Add TARGET_DOTPROD. diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c index 7edf75c52ef..242b2e3dc31 100644 --- a/gcc/config/aarch64/aarch64-builtins.c +++ b/gcc/config/aarch64/aarch64-builtins.c @@ -168,6 +168,11 @@ aarch64_types_quadop_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_none, qualifier_none, qualifier_none, qualifier_none, qualifier_lane_index }; #define TYPES_QUADOP_LANE (aarch64_types_quadop_lane_qualifiers) +static enum aarch64_type_qualifiers +aarch64_types_quadopu_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned, + qualifier_unsigned, qualifier_lane_index }; +#define TYPES_QUADOPU_LANE (aarch64_types_quadopu_lane_qualifiers) static enum aarch64_type_qualifiers aarch64_types_binop_imm_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index d713d5d8b88..52d01342372 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -205,6 +205,14 @@ BUILTIN_VSDQ_I_DI (BINOP, srshl, 0) BUILTIN_VSDQ_I_DI (BINOP_UUS, urshl, 0) + /* Implemented by aarch64_{_lane}{q}. */ + BUILTIN_VB (TERNOP, sdot, 0) + BUILTIN_VB (TERNOPU, udot, 0) + BUILTIN_VB (QUADOP_LANE, sdot_lane, 0) + BUILTIN_VB (QUADOPU_LANE, udot_lane, 0) + BUILTIN_VB (QUADOP_LANE, sdot_laneq, 0) + BUILTIN_VB (QUADOPU_LANE, udot_laneq, 0) + BUILTIN_VDQ_I (SHIFTIMM, ashr, 3) VAR1 (SHIFTIMM, ashr_simd, 0, di) BUILTIN_VDQ_I (SHIFTIMM, lshr, 3) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 12da8be73e8..49f615cfdbf 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -393,6 +393,87 @@ } ) +;; These instructions map to the __builtins for the Dot Product operations. +(define_insn "aarch64_dot" + [(set (match_operand:VS 0 "register_operand" "=w") + (plus:VS (match_operand:VS 1 "register_operand" "0") + (unspec:VS [(match_operand: 2 "register_operand" "w") + (match_operand: 3 "register_operand" "w")] + DOTPROD)))] + "TARGET_DOTPROD" + "dot\\t%0., %2., %3." + [(set_attr "type" "neon_dot")] +) + +;; These expands map to the Dot Product optab the vectorizer checks for. +;; The auto-vectorizer expects a dot product builtin that also does an +;; accumulation into the provided register. +;; Given the following pattern +;; +;; for (i=0; idot_prod" + [(set (match_operand:VS 0 "register_operand") + (plus:VS (unspec:VS [(match_operand: 1 "register_operand") + (match_operand: 2 "register_operand")] + DOTPROD) + (match_operand:VS 3 "register_operand")))] + "TARGET_DOTPROD" +{ + emit_insn ( + gen_aarch64_dot (operands[3], operands[3], operands[1], + operands[2])); + emit_insn (gen_rtx_SET (operands[0], operands[3])); + DONE; +}) + +;; These instructions map to the __builtins for the Dot Product +;; indexed operations. +(define_insn "aarch64_dot_lane" + [(set (match_operand:VS 0 "register_operand" "=w") + (plus:VS (match_operand:VS 1 "register_operand" "0") + (unspec:VS [(match_operand: 2 "register_operand" "w") + (match_operand:V8QI 3 "register_operand" "") + (match_operand:SI 4 "immediate_operand" "i")] + DOTPROD)))] + "TARGET_DOTPROD" + { + operands[4] + = GEN_INT (ENDIAN_LANE_N (V8QImode, INTVAL (operands[4]))); + return "dot\\t%0., %2., %3.4b[%4]"; + } + [(set_attr "type" "neon_dot")] +) + +(define_insn "aarch64_dot_laneq" + [(set (match_operand:VS 0 "register_operand" "=w") + (plus:VS (match_operand:VS 1 "register_operand" "0") + (unspec:VS [(match_operand: 2 "register_operand" "w") + (match_operand:V16QI 3 "register_operand" "") + (match_operand:SI 4 "immediate_operand" "i")] + DOTPROD)))] + "TARGET_DOTPROD" + { + operands[4] + = GEN_INT (ENDIAN_LANE_N (V16QImode, INTVAL (operands[4]))); + return "dot\\t%0., %2., %3.4b[%4]"; + } + [(set_attr "type" "neon_dot")] +) + (define_expand "copysign3" [(match_operand:VHSDF 0 "register_operand") (match_operand:VHSDF 1 "register_operand") diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 477dc35daf6..48cedbe84a6 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -354,6 +354,8 @@ UNSPEC_SQRDMLSH ; Used in aarch64-simd.md. UNSPEC_FMAXNM ; Used in aarch64-simd.md. UNSPEC_FMINNM ; Used in aarch64-simd.md. + UNSPEC_SDOT ; Used in aarch64-simd.md. + UNSPEC_UDOT ; Used in aarch64-simd.md. ]) ;; ------------------------------------------------------------------ @@ -800,6 +802,10 @@ (define_mode_attr vsi2qi [(V2SI "v8qi") (V4SI "v16qi")]) (define_mode_attr VSI2QI [(V2SI "V8QI") (V4SI "V16QI")]) + +;; Register suffix for DOTPROD input types from the return type. +(define_mode_attr Vdottype [(V2SI "8b") (V4SI "16b")]) + ;; Sum of lengths of instructions needed to move vector registers of a mode. (define_mode_attr insn_count [(OI "8") (CI "12") (XI "16")]) @@ -1029,6 +1035,7 @@ UNSPEC_SHSUB UNSPEC_UHSUB UNSPEC_SRHSUB UNSPEC_URHSUB]) +(define_int_iterator DOTPROD [UNSPEC_SDOT UNSPEC_UDOT]) (define_int_iterator ADDSUBHN [UNSPEC_ADDHN UNSPEC_RADDHN UNSPEC_SUBHN UNSPEC_RSUBHN]) @@ -1166,6 +1173,7 @@ (UNSPEC_USHLL "u") (UNSPEC_SSHLL "s") (UNSPEC_URSHL "ur") (UNSPEC_SRSHL "sr") (UNSPEC_UQRSHL "u") (UNSPEC_SQRSHL "s") + (UNSPEC_SDOT "s") (UNSPEC_UDOT "u") ]) (define_int_attr r [(UNSPEC_SQDMULH "") (UNSPEC_SQRDMULH "r") -- 2.11.4.GIT