RISC-V: Support highpart register overlap for widen vx/vf instructions
commita23415d7572774701d7ec04664390260ab9a3f63
authorJuzhe-Zhong <juzhe.zhong@rivai.ai>
Fri, 1 Dec 2023 07:00:27 +0000 (1 15:00 +0800)
committerLehua Ding <lehua.ding@rivai.ai>
Fri, 1 Dec 2023 12:07:10 +0000 (1 20:07 +0800)
treeab49cacf09ad84cb4463c618174c2947afd49871
parent4418d55bcd1b7e0ef823981b6a781d7de5c38cce
RISC-V: Support highpart register overlap for widen vx/vf instructions

This patch leverages the same approach as vwcvt.

Before this patch:

.L5:
        add     a3,s0,s1
        add     a4,s6,s1
        add     a5,s7,s1
        vsetvli zero,s0,e32,m4,ta,ma
        vle32.v v16,0(s1)
        vle32.v v12,0(a3)
        mv      s1,s2
        vle32.v v8,0(a4)
        vle32.v v4,0(a5)
        nop
        vfwadd.vf       v24,v16,fs0
        vfwadd.vf       v16,v12,fs0
        vs8r.v  v16,0(sp)                -----> spill
        vfwadd.vf       v16,v8,fs0
        vfwadd.vf       v8,v4,fs0
        nop
        vsetvli zero,zero,e64,m8,ta,ma
        vfmv.f.s        fa4,v24
        vl8re64.v       v24,0(sp)       -----> reload
        vfmv.f.s        fa5,v24
        fcvt.lu.d a0,fa4,rtz
        fcvt.lu.d a1,fa5,rtz
        vfmv.f.s        fa4,v16
        vfmv.f.s        fa5,v8
        fcvt.lu.d a2,fa4,rtz
        fcvt.lu.d a3,fa5,rtz
        add     s2,s2,s5
        call    sumation
        add     s3,s3,a0
        bgeu    s4,s2,.L5

After this patch:

.L5:
add a3,s0,s1
add a4,s6,s1
add a5,s7,s1
vsetvli zero,s0,e32,m4,ta,ma
vle32.v v4,0(s1)
vle32.v v28,0(a3)
mv s1,s2
vle32.v v20,0(a4)
vle32.v v12,0(a5)
vfwadd.vf v0,v4,fs0
vfwadd.vf v24,v28,fs0
vfwadd.vf v16,v20,fs0
vfwadd.vf v8,v12,fs0
vsetvli zero,zero,e64,m8,ta,ma
vfmv.f.s fa4,v0
vfmv.f.s fa5,v24
fcvt.lu.d a0,fa4,rtz
fcvt.lu.d a1,fa5,rtz
vfmv.f.s fa4,v16
vfmv.f.s fa5,v8
fcvt.lu.d a2,fa4,rtz
fcvt.lu.d a3,fa5,rtz
add s2,s2,s5
call sumation
add s3,s3,a0
bgeu s4,s2,.L5

PR target/112431

gcc/ChangeLog:

* config/riscv/vector.md: Support highpart overlap for vx/vf.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr112431-22.c: New test.
* gcc.target/riscv/rvv/base/pr112431-23.c: New test.
* gcc.target/riscv/rvv/base/pr112431-24.c: New test.
* gcc.target/riscv/rvv/base/pr112431-25.c: New test.
* gcc.target/riscv/rvv/base/pr112431-26.c: New test.
* gcc.target/riscv/rvv/base/pr112431-27.c: New test.
gcc/config/riscv/vector.md
gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-22.c [new file with mode: 0644]
gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-23.c [new file with mode: 0644]
gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-24.c [new file with mode: 0644]
gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-25.c [new file with mode: 0644]
gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-26.c [new file with mode: 0644]
gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-27.c [new file with mode: 0644]