RISC-V: Support highpart overlap for indexed load with SRC EEW < DEST EEW
Leverage previous approach.
Before this patch:
.L5:
add a3,s0,s2
add a4,s6,s2
add a5,s7,s2
vsetvli zero,s0,e64,m8,ta,ma
vle8.v v4,0(s2)
vle8.v v3,0(a3)
mv s2,s1
vle8.v v2,0(a4)
vle8.v v1,0(a5)
nop
vluxei8.v v8,(s1),v4
vs8r.v v8,0(sp) ---> spill
vluxei8.v v8,(s1),v3
vluxei8.v v16,(s1),v2
vluxei8.v v24,(s1),v1
nop
vmv.x.s a1,v8
vl8re64.v v8,0(sp) ---> reload
vmv.x.s a3,v24
vmv.x.s a2,v16
vmv.x.s a0,v8
add s1,s1,s5
call sumation
add s3,s3,a0
bgeu s4,s1,.L5
After this patch:
.L5:
add a3,s0,s2
add a4,s6,s2
add a5,s7,s2
vsetvli zero,s0,e64,m8,ta,ma
vle8.v v15,0(s2)
vle8.v v23,0(a3)
mv s2,s1
vle8.v v31,0(a4)
vle8.v v7,0(a5)
vluxei8.v v8,(s1),v15
vluxei8.v v16,(s1),v23
vluxei8.v v24,(s1),v31
vluxei8.v v0,(s1),v7
vmv.x.s a3,v0
vmv.x.s a2,v24
vmv.x.s a1,v16
vmv.x.s a0,v8
add s1,s1,s5
call sumation
add s3,s3,a0
bgeu s4,s1,.L5
PR target/112431
gcc/ChangeLog:
* config/riscv/vector.md: Support highpart overlap for indexed load.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/base/pr112431-28.c: New test.
* gcc.target/riscv/rvv/base/pr112431-29.c: New test.
* gcc.target/riscv/rvv/base/pr112431-30.c: New test.
* gcc.target/riscv/rvv/base/pr112431-31.c: New test.
* gcc.target/riscv/rvv/base/pr112431-32.c: New test.
* gcc.target/riscv/rvv/base/pr112431-33.c: New test.