RISC-V: Support highpart register overlap for widen vx/vf instructions
This patch leverages the same approach as vwcvt.
Before this patch:
.L5:
add a3,s0,s1
add a4,s6,s1
add a5,s7,s1
vsetvli zero,s0,e32,m4,ta,ma
vle32.v v16,0(s1)
vle32.v v12,0(a3)
mv s1,s2
vle32.v v8,0(a4)
vle32.v v4,0(a5)
nop
vfwadd.vf v24,v16,fs0
vfwadd.vf v16,v12,fs0
vs8r.v v16,0(sp) -----> spill
vfwadd.vf v16,v8,fs0
vfwadd.vf v8,v4,fs0
nop
vsetvli zero,zero,e64,m8,ta,ma
vfmv.f.s fa4,v24
vl8re64.v v24,0(sp) -----> reload
vfmv.f.s fa5,v24
fcvt.lu.d a0,fa4,rtz
fcvt.lu.d a1,fa5,rtz
vfmv.f.s fa4,v16
vfmv.f.s fa5,v8
fcvt.lu.d a2,fa4,rtz
fcvt.lu.d a3,fa5,rtz
add s2,s2,s5
call sumation
add s3,s3,a0
bgeu s4,s2,.L5
After this patch:
.L5:
add a3,s0,s1
add a4,s6,s1
add a5,s7,s1
vsetvli zero,s0,e32,m4,ta,ma
vle32.v v4,0(s1)
vle32.v v28,0(a3)
mv s1,s2
vle32.v v20,0(a4)
vle32.v v12,0(a5)
vfwadd.vf v0,v4,fs0
vfwadd.vf v24,v28,fs0
vfwadd.vf v16,v20,fs0
vfwadd.vf v8,v12,fs0
vsetvli zero,zero,e64,m8,ta,ma
vfmv.f.s fa4,v0
vfmv.f.s fa5,v24
fcvt.lu.d a0,fa4,rtz
fcvt.lu.d a1,fa5,rtz
vfmv.f.s fa4,v16
vfmv.f.s fa5,v8
fcvt.lu.d a2,fa4,rtz
fcvt.lu.d a3,fa5,rtz
add s2,s2,s5
call sumation
add s3,s3,a0
bgeu s4,s2,.L5
PR target/112431
gcc/ChangeLog:
* config/riscv/vector.md: Support highpart overlap for vx/vf.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/base/pr112431-22.c: New test.
* gcc.target/riscv/rvv/base/pr112431-23.c: New test.
* gcc.target/riscv/rvv/base/pr112431-24.c: New test.
* gcc.target/riscv/rvv/base/pr112431-25.c: New test.
* gcc.target/riscv/rvv/base/pr112431-26.c: New test.
* gcc.target/riscv/rvv/base/pr112431-27.c: New test.