From 13c247d6f2a75b7e7a11546e897489716bc31506 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Mon, 18 Nov 2019 15:27:56 +0000 Subject: [PATCH] Handle VIEW_CONVERT_EXPR for variable-length vectors This patch handles VIEW_CONVERT_EXPRs of variable-length VECTOR_CSTs by adding tree-level versions of native_decode_vector_rtx and simplify_const_vector_subreg. It uses the same code for fixed-length vectors, both to get more coverage and because operating directly on the compressed encoding should be more efficient for longer vectors with a regular pattern. The structure and comments are very similar between the tree and rtx routines. 2019-11-18 Richard Sandiford gcc/ * fold-const.c (native_encode_vector): Turn into a wrapper function, splitting the main code out into... (native_encode_vector_part): ...this new function. (native_decode_vector_tree): New function. (fold_view_convert_vector_encoding): Likewise. (fold_view_convert_expr): Use it for converting VECTOR_CSTs to VECTOR_TYPEs. gcc/testsuite/ * gcc.target/aarch64/sve/acle/general/temporaries_1.c: New test. From-SVN: r278410 --- gcc/ChangeLog | 10 ++ gcc/fold-const.c | 139 +++++++++++++++++++-- gcc/testsuite/ChangeLog | 4 + .../aarch64/sve/acle/general/temporaries_1.c | 70 +++++++++++ 4 files changed, 214 insertions(+), 9 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general/temporaries_1.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 6ca0c57e6c6..932b87a6e5c 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,15 @@ 2019-11-18 Richard Sandiford + * fold-const.c (native_encode_vector): Turn into a wrapper function, + splitting the main code out into... + (native_encode_vector_part): ...this new function. + (native_decode_vector_tree): New function. + (fold_view_convert_vector_encoding): Likewise. + (fold_view_convert_expr): Use it for converting VECTOR_CSTs + to VECTOR_TYPEs. + +2019-11-18 Richard Sandiford + * tree-data-ref.c (create_intersect_range_checks_index): If the alias pair describes simple WAW and WAR dependencies, just check whether the first B access overlaps later A accesses. diff --git a/gcc/fold-const.c b/gcc/fold-const.c index b48296ca111..02daacdb36b 100644 --- a/gcc/fold-const.c +++ b/gcc/fold-const.c @@ -7715,22 +7715,18 @@ native_encode_complex (const_tree expr, unsigned char *ptr, int len, int off) return rsize + isize; } - -/* Subroutine of native_encode_expr. Encode the VECTOR_CST - specified by EXPR into the buffer PTR of length LEN bytes. - Return the number of bytes placed in the buffer, or zero - upon failure. */ +/* Like native_encode_vector, but only encode the first COUNT elements. + The other arguments are as for native_encode_vector. */ static int -native_encode_vector (const_tree expr, unsigned char *ptr, int len, int off) +native_encode_vector_part (const_tree expr, unsigned char *ptr, int len, + int off, unsigned HOST_WIDE_INT count) { - unsigned HOST_WIDE_INT i, count; + unsigned HOST_WIDE_INT i; int size, offset; tree itype, elem; offset = 0; - if (!VECTOR_CST_NELTS (expr).is_constant (&count)) - return 0; itype = TREE_TYPE (TREE_TYPE (expr)); size = GET_MODE_SIZE (SCALAR_TYPE_MODE (itype)); for (i = 0; i < count; i++) @@ -7754,6 +7750,20 @@ native_encode_vector (const_tree expr, unsigned char *ptr, int len, int off) return offset; } +/* Subroutine of native_encode_expr. Encode the VECTOR_CST + specified by EXPR into the buffer PTR of length LEN bytes. + Return the number of bytes placed in the buffer, or zero + upon failure. */ + +static int +native_encode_vector (const_tree expr, unsigned char *ptr, int len, int off) +{ + unsigned HOST_WIDE_INT count; + if (!VECTOR_CST_NELTS (expr).is_constant (&count)) + return 0; + return native_encode_vector_part (expr, ptr, len, off, count); +} + /* Subroutine of native_encode_expr. Encode the STRING_CST specified by EXPR into the buffer PTR of length LEN bytes. @@ -8049,6 +8059,113 @@ can_native_interpret_type_p (tree type) } } +/* Read a vector of type TYPE from the target memory image given by BYTES, + starting at byte FIRST_BYTE. The vector is known to be encodable using + NPATTERNS interleaved patterns with NELTS_PER_PATTERN elements each, + and BYTES is known to have enough bytes to supply NPATTERNS * + NELTS_PER_PATTERN vector elements. Each element of BYTES contains + BITS_PER_UNIT bits and the bytes are in target memory order. + + Return the vector on success, otherwise return null. */ + +static tree +native_decode_vector_tree (tree type, vec bytes, + unsigned int first_byte, unsigned int npatterns, + unsigned int nelts_per_pattern) +{ + tree_vector_builder builder (type, npatterns, nelts_per_pattern); + tree elt_type = TREE_TYPE (type); + unsigned int elt_bits = tree_to_uhwi (TYPE_SIZE (elt_type)); + if (VECTOR_BOOLEAN_TYPE_P (type) && elt_bits <= BITS_PER_UNIT) + { + /* This is the only case in which elements can be smaller than a byte. + Element 0 is always in the lsb of the containing byte. */ + elt_bits = TYPE_PRECISION (elt_type); + for (unsigned int i = 0; i < builder.encoded_nelts (); ++i) + { + unsigned int bit_index = first_byte * BITS_PER_UNIT + i * elt_bits; + unsigned int byte_index = bit_index / BITS_PER_UNIT; + unsigned int lsb = bit_index % BITS_PER_UNIT; + builder.quick_push (bytes[byte_index] & (1 << lsb) + ? build_all_ones_cst (elt_type) + : build_zero_cst (elt_type)); + } + } + else + { + unsigned int elt_bytes = elt_bits / BITS_PER_UNIT; + for (unsigned int i = 0; i < builder.encoded_nelts (); ++i) + { + tree elt = native_interpret_expr (elt_type, &bytes[first_byte], + elt_bytes); + if (!elt) + return NULL_TREE; + builder.quick_push (elt); + first_byte += elt_bytes; + } + } + return builder.build (); +} + +/* Try to view-convert VECTOR_CST EXPR to VECTOR_TYPE TYPE by operating + directly on the VECTOR_CST encoding, in a way that works for variable- + length vectors. Return the resulting VECTOR_CST on success or null + on failure. */ + +static tree +fold_view_convert_vector_encoding (tree type, tree expr) +{ + tree expr_type = TREE_TYPE (expr); + poly_uint64 type_bits, expr_bits; + if (!poly_int_tree_p (TYPE_SIZE (type), &type_bits) + || !poly_int_tree_p (TYPE_SIZE (expr_type), &expr_bits)) + return NULL_TREE; + + poly_uint64 type_units = TYPE_VECTOR_SUBPARTS (type); + poly_uint64 expr_units = TYPE_VECTOR_SUBPARTS (expr_type); + unsigned int type_elt_bits = vector_element_size (type_bits, type_units); + unsigned int expr_elt_bits = vector_element_size (expr_bits, expr_units); + + /* We can only preserve the semantics of a stepped pattern if the new + vector element is an integer of the same size. */ + if (VECTOR_CST_STEPPED_P (expr) + && (!INTEGRAL_TYPE_P (type) || type_elt_bits != expr_elt_bits)) + return NULL_TREE; + + /* The number of bits needed to encode one element from every pattern + of the original vector. */ + unsigned int expr_sequence_bits + = VECTOR_CST_NPATTERNS (expr) * expr_elt_bits; + + /* The number of bits needed to encode one element from every pattern + of the result. */ + unsigned int type_sequence_bits + = least_common_multiple (expr_sequence_bits, type_elt_bits); + + /* Don't try to read more bytes than are available, which can happen + for constant-sized vectors if TYPE has larger elements than EXPR_TYPE. + The general VIEW_CONVERT handling can cope with that case, so there's + no point complicating things here. */ + unsigned int nelts_per_pattern = VECTOR_CST_NELTS_PER_PATTERN (expr); + unsigned int buffer_bytes = CEIL (nelts_per_pattern * type_sequence_bits, + BITS_PER_UNIT); + unsigned int buffer_bits = buffer_bytes * BITS_PER_UNIT; + if (known_gt (buffer_bits, expr_bits)) + return NULL_TREE; + + /* Get enough bytes of EXPR to form the new encoding. */ + auto_vec buffer (buffer_bytes); + buffer.quick_grow (buffer_bytes); + if (native_encode_vector_part (expr, buffer.address (), buffer_bytes, 0, + buffer_bits / expr_elt_bits) + != (int) buffer_bytes) + return NULL_TREE; + + /* Reencode the bytes as TYPE. */ + unsigned int type_npatterns = type_sequence_bits / type_elt_bits; + return native_decode_vector_tree (type, buffer, 0, type_npatterns, + nelts_per_pattern); +} /* Fold a VIEW_CONVERT_EXPR of a constant expression EXPR to type TYPE at compile-time. If we're unable to perform the conversion @@ -8065,6 +8182,10 @@ fold_view_convert_expr (tree type, tree expr) if (CHAR_BIT != 8 || BITS_PER_UNIT != 8) return NULL_TREE; + if (VECTOR_TYPE_P (type) && TREE_CODE (expr) == VECTOR_CST) + if (tree res = fold_view_convert_vector_encoding (type, expr)) + return res; + len = native_encode_expr (expr, buffer, sizeof (buffer)); if (len == 0) return NULL_TREE; diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 12a0d08238c..033f2e17667 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,5 +1,9 @@ 2019-11-18 Richard Sandiford + * gcc.target/aarch64/sve/acle/general/temporaries_1.c: New test. + +2019-11-18 Richard Sandiford + * gcc.dg/vect/vect-alias-check-8.c: Expect WAR/WAW checks to be used. * gcc.dg/vect/vect-alias-check-14.c: Likewise. * gcc.dg/vect/vect-alias-check-15.c: Likewise. diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/temporaries_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/temporaries_1.c new file mode 100644 index 00000000000..217131d8c05 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/temporaries_1.c @@ -0,0 +1,70 @@ +/* { dg-do compile } */ +/* { dg-options "-O" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* +** test_s8: +** ptrue (p[0-7])\.b, all +** ld1b (z[0-9]+\.b), \1/z, \[x0\] +** add \2, \2, #1 +** st1b \2, \1, \[x1\] +** ret +*/ +void +test_s8 (int8_t *x, int8_t *y) +{ + int8_t tmp1[32], tmp2[32]; + + svbool_t pg = svptrue_b8 (); + svst1 (pg, tmp1, svld1 (pg, x)); + svst1 (pg, tmp2, svadd_x (pg, svld1 (pg, tmp1), 1)); + svst1 (pg, y, svld1 (pg, tmp2)); +} + +/* +** test_s32_b8: +** ptrue (p[0-7])\.b, all +** ld1w (z[0-9]+\.s), \1/z, \[x0\] +** add \2, \2, #1 +** st1w \2, \1, \[x1\] +** ret +*/ +void +test_s32_b8 (int32_t *x, int32_t *y) +{ + int32_t tmp1[8], tmp2[8]; + + svbool_t pg = svptrue_b8 (); + svst1 (pg, tmp1, svld1 (pg, x)); + svst1 (pg, tmp2, svadd_x (pg, svld1 (pg, tmp1), 1)); + svst1 (pg, y, svld1 (pg, tmp2)); +} + +/* +** test_s32_b32: +** ptrue (p[0-7])\.b, all +** ld1w (z[0-9]+\.s), \1/z, \[x0\] +** add \2, \2, #1 +** st1w \2, \1, \[x1\] +** ret +*/ +void +test_s32_b32 (int32_t *x, int32_t *y) +{ + int32_t tmp1[8], tmp2[8]; + + svbool_t pg = svptrue_b32 (); + svst1 (pg, tmp1, svld1 (pg, x)); + svst1 (pg, tmp2, svadd_x (pg, svld1 (pg, tmp1), 1)); + svst1 (pg, y, svld1 (pg, tmp2)); +} + +#ifdef __cplusplus +} +#endif -- 2.11.4.GIT