From f63c1cff2969b0030e227c6a8cf123aaad781349 Mon Sep 17 00:00:00 2001 From: rsandifo Date: Tue, 2 Jan 2018 18:27:15 +0000 Subject: [PATCH] Rework VEC_PERM_EXPR folding This patch reworks the VEC_PERM_EXPR folding so that more of it works for variable-length vectors. E.g. it means that we can now recognise variable-length permutes that reduce to a single vector, or cases in which a variable-length permute only needs one input. There should be no functional change for fixed-length vectors. 2018-01-02 Richard Sandiford gcc/ * selftest.h (selftest::vec_perm_indices_c_tests): Declare. * selftest-run-tests.c (selftest::run_tests): Call it. * vector-builder.h (vector_builder::operator ==): New function. (vector_builder::operator !=): Likewise. * vec-perm-indices.h (vec_perm_indices::series_p): Declare. (vec_perm_indices::all_from_input_p): New function. * vec-perm-indices.c (vec_perm_indices::series_p): Likewise. (test_vec_perm_12, selftest::vec_perm_indices_c_tests): Likewise. * fold-const.c (fold_ternary_loc): Use tree_to_vec_perm_builder instead of reading the VECTOR_CST directly. Detect whether both vector inputs are the same before constructing the vec_perm_indices, and update the number of inputs argument accordingly. Use the utility functions added above. Only construct sel2 if we need to. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@256098 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 16 +++++++ gcc/fold-const.c | 118 +++++++++++++++++------------------------------ gcc/selftest-run-tests.c | 1 + gcc/selftest.h | 1 + gcc/vec-perm-indices.c | 98 +++++++++++++++++++++++++++++++++++++++ gcc/vec-perm-indices.h | 11 +++++ gcc/vector-builder.h | 23 +++++++++ 7 files changed, 192 insertions(+), 76 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c5b62cd3628..9e3face038b 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,21 @@ 2018-01-02 Richard Sandiford + * selftest.h (selftest::vec_perm_indices_c_tests): Declare. + * selftest-run-tests.c (selftest::run_tests): Call it. + * vector-builder.h (vector_builder::operator ==): New function. + (vector_builder::operator !=): Likewise. + * vec-perm-indices.h (vec_perm_indices::series_p): Declare. + (vec_perm_indices::all_from_input_p): New function. + * vec-perm-indices.c (vec_perm_indices::series_p): Likewise. + (test_vec_perm_12, selftest::vec_perm_indices_c_tests): Likewise. + * fold-const.c (fold_ternary_loc): Use tree_to_vec_perm_builder + instead of reading the VECTOR_CST directly. Detect whether both + vector inputs are the same before constructing the vec_perm_indices, + and update the number of inputs argument accordingly. Use the + utility functions added above. Only construct sel2 if we need to. + +2018-01-02 Richard Sandiford + * optabs.c (expand_vec_perm_var): Use an explicit encoding for the broadcast of the low byte. (expand_mult_highpart): Use an explicit encoding for the permutes. diff --git a/gcc/fold-const.c b/gcc/fold-const.c index ca0d8fd9319..f28970b2a87 100644 --- a/gcc/fold-const.c +++ b/gcc/fold-const.c @@ -11705,99 +11705,65 @@ fold_ternary_loc (location_t loc, enum tree_code code, tree type, case VEC_PERM_EXPR: if (TREE_CODE (arg2) == VECTOR_CST) { - unsigned int nelts = VECTOR_CST_NELTS (arg2), i, mask, mask2; - bool need_mask_canon = false; - bool need_mask_canon2 = false; - bool all_in_vec0 = true; - bool all_in_vec1 = true; - bool maybe_identity = true; - bool single_arg = (op0 == op1); - bool changed = false; - - mask2 = 2 * nelts - 1; - mask = single_arg ? (nelts - 1) : mask2; - gcc_assert (nelts == TYPE_VECTOR_SUBPARTS (type)); - vec_perm_builder sel (nelts, nelts, 1); - vec_perm_builder sel2 (nelts, nelts, 1); - for (i = 0; i < nelts; i++) - { - tree val = VECTOR_CST_ELT (arg2, i); - if (TREE_CODE (val) != INTEGER_CST) - return NULL_TREE; - - /* Make sure that the perm value is in an acceptable - range. */ - wi::tree_to_wide_ref t = wi::to_wide (val); - need_mask_canon |= wi::gtu_p (t, mask); - need_mask_canon2 |= wi::gtu_p (t, mask2); - unsigned int elt = t.to_uhwi () & mask; - unsigned int elt2 = t.to_uhwi () & mask2; - - if (elt < nelts) - all_in_vec1 = false; - else - all_in_vec0 = false; - - if ((elt & (nelts - 1)) != i) - maybe_identity = false; + /* Build a vector of integers from the tree mask. */ + vec_perm_builder builder; + if (!tree_to_vec_perm_builder (&builder, arg2)) + return NULL_TREE; - sel.quick_push (elt); - sel2.quick_push (elt2); - } + /* Create a vec_perm_indices for the integer vector. */ + unsigned int nelts = TYPE_VECTOR_SUBPARTS (type); + bool single_arg = (op0 == op1); + vec_perm_indices sel (builder, single_arg ? 1 : 2, nelts); - if (maybe_identity) - { - if (all_in_vec0) - return op0; - if (all_in_vec1) - return op1; - } + /* Check for cases that fold to OP0 or OP1 in their original + element order. */ + if (sel.series_p (0, 1, 0, 1)) + return op0; + if (sel.series_p (0, 1, nelts, 1)) + return op1; - if (all_in_vec0) - op1 = op0; - else if (all_in_vec1) + if (!single_arg) { - op0 = op1; - for (i = 0; i < nelts; i++) - sel[i] -= nelts; - need_mask_canon = true; + if (sel.all_from_input_p (0)) + op1 = op0; + else if (sel.all_from_input_p (1)) + { + op0 = op1; + sel.rotate_inputs (1); + } } - vec_perm_indices indices (sel, 2, nelts); if ((TREE_CODE (op0) == VECTOR_CST || TREE_CODE (op0) == CONSTRUCTOR) && (TREE_CODE (op1) == VECTOR_CST || TREE_CODE (op1) == CONSTRUCTOR)) { - tree t = fold_vec_perm (type, op0, op1, indices); + tree t = fold_vec_perm (type, op0, op1, sel); if (t != NULL_TREE) return t; } - if (op0 == op1 && !single_arg) - changed = true; - - /* Some targets are deficient and fail to expand a single - argument permutation while still allowing an equivalent - 2-argument version. */ - if (need_mask_canon && arg2 == op2 - && !can_vec_perm_const_p (TYPE_MODE (type), indices, false) - && can_vec_perm_const_p (TYPE_MODE (type), - vec_perm_indices (sel2, 2, nelts), - false)) - { - need_mask_canon = need_mask_canon2; - sel.truncate (0); - sel.splice (sel2); - } + bool changed = (op0 == op1 && !single_arg); - if (need_mask_canon && arg2 == op2) + /* Generate a canonical form of the selector. */ + if (arg2 == op2 && sel.encoding () != builder) { - tree eltype = TREE_TYPE (TREE_TYPE (arg2)); - tree_vector_builder tsel (TREE_TYPE (arg2), nelts, 1); - for (i = 0; i < nelts; i++) - tsel.quick_push (build_int_cst (eltype, sel[i])); - op2 = tsel.build (); + /* Some targets are deficient and fail to expand a single + argument permutation while still allowing an equivalent + 2-argument version. */ + if (sel.ninputs () == 2 + || can_vec_perm_const_p (TYPE_MODE (type), sel, false)) + op2 = vec_perm_indices_to_tree (TREE_TYPE (arg2), sel); + else + { + vec_perm_indices sel2 (builder, 2, nelts); + if (can_vec_perm_const_p (TYPE_MODE (type), sel2, false)) + op2 = vec_perm_indices_to_tree (TREE_TYPE (arg2), sel2); + else + /* Not directly supported with either encoding, + so use the preferred form. */ + op2 = vec_perm_indices_to_tree (TREE_TYPE (arg2), sel); + } changed = true; } diff --git a/gcc/selftest-run-tests.c b/gcc/selftest-run-tests.c index 63d6e071eca..6af0f02ccbb 100644 --- a/gcc/selftest-run-tests.c +++ b/gcc/selftest-run-tests.c @@ -73,6 +73,7 @@ selftest::run_tests () /* Mid-level data structures. */ input_c_tests (); + vec_perm_indices_c_tests (); tree_c_tests (); gimple_c_tests (); rtl_tests_c_tests (); diff --git a/gcc/selftest.h b/gcc/selftest.h index ab8a7c0c71c..23ca66557c1 100644 --- a/gcc/selftest.h +++ b/gcc/selftest.h @@ -215,6 +215,7 @@ extern void vec_c_tests (); extern void wide_int_cc_tests (); extern void predict_c_tests (); extern void simplify_rtx_c_tests (); +extern void vec_perm_indices_c_tests (); extern int num_passes; diff --git a/gcc/vec-perm-indices.c b/gcc/vec-perm-indices.c index 558ab2efcf2..3eb9c41cad0 100644 --- a/gcc/vec-perm-indices.c +++ b/gcc/vec-perm-indices.c @@ -28,6 +28,7 @@ along with GCC; see the file COPYING3. If not see #include "rtl.h" #include "memmodel.h" #include "emit-rtl.h" +#include "selftest.h" /* Switch to a new permutation vector that selects between NINPUTS vector inputs that have NELTS_PER_INPUT elements each. Take the elements of the @@ -85,6 +86,54 @@ vec_perm_indices::rotate_inputs (int delta) m_encoding[i] = clamp (m_encoding[i] + element_delta); } +/* Return true if index OUT_BASE + I * OUT_STEP selects input + element IN_BASE + I * IN_STEP. */ + +bool +vec_perm_indices::series_p (unsigned int out_base, unsigned int out_step, + element_type in_base, element_type in_step) const +{ + /* Check the base value. */ + if (clamp (m_encoding.elt (out_base)) != clamp (in_base)) + return false; + + unsigned int full_nelts = m_encoding.full_nelts (); + unsigned int npatterns = m_encoding.npatterns (); + + /* Calculate which multiple of OUT_STEP elements we need to get + back to the same pattern. */ + unsigned int cycle_length = least_common_multiple (out_step, npatterns); + + /* Check the steps. */ + in_step = clamp (in_step); + out_base += out_step; + unsigned int limit = 0; + for (;;) + { + /* Succeed if we've checked all the elements in the vector. */ + if (out_base >= full_nelts) + return true; + + if (out_base >= npatterns) + { + /* We've got to the end of the "foreground" values. Check + 2 elements from each pattern in the "background" values. */ + if (limit == 0) + limit = out_base + cycle_length * 2; + else if (out_base >= limit) + return true; + } + + element_type v0 = m_encoding.elt (out_base - out_step); + element_type v1 = m_encoding.elt (out_base); + if (clamp (v1 - v0) != in_step) + return false; + + out_base += out_step; + } + return true; +} + /* Return true if all elements of the permutation vector are in the range [START, START + SIZE). */ @@ -180,3 +229,52 @@ vec_perm_indices_to_rtx (machine_mode mode, const vec_perm_indices &indices) RTVEC_ELT (v, i) = gen_int_mode (indices[i], GET_MODE_INNER (mode)); return gen_rtx_CONST_VECTOR (mode, v); } + +#if CHECKING_P + +namespace selftest { + +/* Test a 12-element vector. */ + +static void +test_vec_perm_12 (void) +{ + vec_perm_builder builder (12, 12, 1); + for (unsigned int i = 0; i < 4; ++i) + { + builder.quick_push (i * 5); + builder.quick_push (3 + i); + builder.quick_push (2 + 3 * i); + } + vec_perm_indices indices (builder, 1, 12); + ASSERT_TRUE (indices.series_p (0, 3, 0, 5)); + ASSERT_FALSE (indices.series_p (0, 3, 3, 5)); + ASSERT_FALSE (indices.series_p (0, 3, 0, 8)); + ASSERT_TRUE (indices.series_p (1, 3, 3, 1)); + ASSERT_TRUE (indices.series_p (2, 3, 2, 3)); + + ASSERT_TRUE (indices.series_p (0, 4, 0, 4)); + ASSERT_FALSE (indices.series_p (1, 4, 3, 4)); + + ASSERT_TRUE (indices.series_p (0, 6, 0, 10)); + ASSERT_FALSE (indices.series_p (0, 6, 0, 100)); + + ASSERT_FALSE (indices.series_p (1, 10, 3, 7)); + ASSERT_TRUE (indices.series_p (1, 10, 3, 8)); + + ASSERT_TRUE (indices.series_p (0, 12, 0, 10)); + ASSERT_TRUE (indices.series_p (0, 12, 0, 11)); + ASSERT_TRUE (indices.series_p (0, 12, 0, 100)); +} + +/* Run selftests for this file. */ + +void +vec_perm_indices_c_tests () +{ + test_vec_perm_12 (); +} + +} // namespace selftest + +#endif diff --git a/gcc/vec-perm-indices.h b/gcc/vec-perm-indices.h index 1048f558d97..52b65a51e4b 100644 --- a/gcc/vec-perm-indices.h +++ b/gcc/vec-perm-indices.h @@ -77,7 +77,9 @@ public: element_type clamp (element_type) const; element_type operator[] (unsigned int i) const; + bool series_p (unsigned int, unsigned int, element_type, element_type) const; bool all_in_range_p (element_type, element_type) const; + bool all_from_input_p (unsigned int) const; private: vec_perm_indices (const vec_perm_indices &); @@ -134,4 +136,13 @@ vec_perm_indices::operator[] (unsigned int i) const return clamp (m_encoding.elt (i)); } +/* Return true if the permutation vector only selects elements from + input I. */ + +inline bool +vec_perm_indices::all_from_input_p (unsigned int i) const +{ + return all_in_range_p (i * m_nelts_per_input, m_nelts_per_input); +} + #endif diff --git a/gcc/vector-builder.h b/gcc/vector-builder.h index 0edd6f5c092..74e0b7655ad 100644 --- a/gcc/vector-builder.h +++ b/gcc/vector-builder.h @@ -97,6 +97,9 @@ public: bool encoded_full_vector_p () const; T elt (unsigned int) const; + bool operator == (const Derived &) const; + bool operator != (const Derived &x) const { return !operator == (x); } + void finalize (); protected: @@ -168,6 +171,26 @@ vector_builder::new_vector (unsigned int full_nelts, this->truncate (0); } +/* Return true if this vector and OTHER have the same elements and + are encoded in the same way. */ + +template +bool +vector_builder::operator == (const Derived &other) const +{ + if (m_full_nelts != other.m_full_nelts + || m_npatterns != other.m_npatterns + || m_nelts_per_pattern != other.m_nelts_per_pattern) + return false; + + unsigned int nelts = encoded_nelts (); + for (unsigned int i = 0; i < nelts; ++i) + if (!derived ()->equal_p ((*this)[i], other[i])) + return false; + + return true; +} + /* Return the value of vector element I, which might or might not be encoded explicitly. */ -- 2.11.4.GIT