From 877e9e7026e52454e6f86fa2c6e52dae8e9894f8 Mon Sep 17 00:00:00 2001 From: rsandifo Date: Wed, 10 Jan 2018 13:07:54 +0000 Subject: [PATCH] Don't use permutes for single-element accesses (PR83753) After cunrolling the inner loop, the remaining loop in the testcase has a single 32-bit access and a group of 64-bit accesses. We first try to vectorise at 128 bits (VF 4), but decide not to for cost reasons. We then try with 64 bits (VF 2) instead. This means that the group of 64-bit accesses uses a single-element vector, which is deliberately supported as of r251538. We then try to create "permutes" for these single-element vectors and fall foul of: for (i = 0; i < 6; i++) sel[i] += exact_div (nelt, 2); in vect_grouped_store_supported, since nelt==1. Maybe we shouldn't even be trying to vectorise statements in the single-element case, and instead just copy the scalar statement for each member of the group. But until then, this patch treats non-strided grouped accesses as VMAT_CONTIGUOUS if no permutation is necessary. 2018-01-10 Richard Sandiford gcc/ PR tree-optimization/83753 * tree-vect-stmts.c (get_group_load_store_type): Use VMAT_CONTIGUOUS for non-strided grouped accesses if the number of elements is 1. gcc/testsuite/ PR tree-optimization/83753 * gcc.dg/torture/pr83753.c: New test. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@256427 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 6 ++++++ gcc/testsuite/ChangeLog | 5 +++++ gcc/testsuite/gcc.dg/torture/pr83753.c | 19 +++++++++++++++++++ gcc/tree-vect-stmts.c | 14 ++++++++++---- 4 files changed, 40 insertions(+), 4 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/torture/pr83753.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index a30e2854555..f0e9e5948a2 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,9 @@ +2018-01-10 Richard Sandiford + + PR tree-optimization/83753 + * tree-vect-stmts.c (get_group_load_store_type): Use VMAT_CONTIGUOUS + for non-strided grouped accesses if the number of elements is 1. + 2018-01-10 Jan Hubicka PR target/81616 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 32a1460a35a..2fda0178a97 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2018-01-10 Richard Sandiford + + PR tree-optimization/83753 + * gcc.dg/torture/pr83753.c: New test. + 2018-01-09 Jan Hubicka * gcc.target/i386/avx2-gather-1.c: Add -march. diff --git a/gcc/testsuite/gcc.dg/torture/pr83753.c b/gcc/testsuite/gcc.dg/torture/pr83753.c new file mode 100644 index 00000000000..54aafc0b321 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr83753.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-mcpu=xgene1" { target aarch64*-*-* } } */ + +typedef struct { + int m1[10]; + double m2[10][8]; +} blah; + +void +foo (blah *info) { + int i, d; + + for (d=0; d<10; d++) { + info->m1[d] = 0; + info->m2[d][0] = 1; + for (i=1; i<8; i++) + info->m2[d][i] = 2; + } +} diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index a4f2d713959..819a981d57a 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -1849,10 +1849,16 @@ get_group_load_store_type (gimple *stmt, tree vectype, bool slp, && (can_overrun_p || !would_overrun_p) && compare_step_with_zero (stmt) > 0) { - /* First try using LOAD/STORE_LANES. */ - if (vls_type == VLS_LOAD - ? vect_load_lanes_supported (vectype, group_size) - : vect_store_lanes_supported (vectype, group_size)) + /* First cope with the degenerate case of a single-element + vector. */ + if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U)) + *memory_access_type = VMAT_CONTIGUOUS; + + /* Otherwise try using LOAD/STORE_LANES. */ + if (*memory_access_type == VMAT_ELEMENTWISE + && (vls_type == VLS_LOAD + ? vect_load_lanes_supported (vectype, group_size) + : vect_store_lanes_supported (vectype, group_size))) { *memory_access_type = VMAT_LOAD_STORE_LANES; overrun_p = would_overrun_p; -- 2.11.4.GIT