2 * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences (ISCAS).
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 #include "libavutil/attributes.h"
24 #include "libavutil/cpu.h"
25 #include "libavutil/riscv/cpu.h"
26 #include "libavcodec/vp8dsp.h"
29 void ff_vp8_luma_dc_wht_rvv(int16_t block
[4][4][16], int16_t dc
[16]);
30 void ff_vp8_idct_dc_add_rvv(uint8_t *dst
, int16_t block
[16], ptrdiff_t stride
);
31 void ff_vp8_idct_dc_add4y_rvv(uint8_t *dst
, int16_t block
[4][16], ptrdiff_t stride
);
32 void ff_vp8_idct_dc_add4uv_rvv(uint8_t *dst
, int16_t block
[4][16], ptrdiff_t stride
);
45 av_cold
void ff_vp78dsp_init_riscv(VP8DSPContext
*c
)
48 int flags
= av_get_cpu_flags();
49 if (flags
& AV_CPU_FLAG_RV_MISALIGNED
) {
50 #if __riscv_xlen >= 64
51 c
->put_vp8_epel_pixels_tab
[0][0][0] = ff_put_vp8_pixels16_rvi
;
52 c
->put_vp8_epel_pixels_tab
[1][0][0] = ff_put_vp8_pixels8_rvi
;
53 c
->put_vp8_bilinear_pixels_tab
[0][0][0] = ff_put_vp8_pixels16_rvi
;
54 c
->put_vp8_bilinear_pixels_tab
[1][0][0] = ff_put_vp8_pixels8_rvi
;
56 c
->put_vp8_epel_pixels_tab
[2][0][0] = ff_put_vp8_pixels4_rvi
;
57 c
->put_vp8_bilinear_pixels_tab
[2][0][0] = ff_put_vp8_pixels4_rvi
;
60 if (flags
& AV_CPU_FLAG_RVV_I32
&& ff_rv_vlen_least(128)) {
61 c
->put_vp8_bilinear_pixels_tab
[0][0][1] = ff_put_vp8_bilin16_h_rvv
;
62 c
->put_vp8_bilinear_pixels_tab
[0][0][2] = ff_put_vp8_bilin16_h_rvv
;
63 c
->put_vp8_bilinear_pixels_tab
[1][0][1] = ff_put_vp8_bilin8_h_rvv
;
64 c
->put_vp8_bilinear_pixels_tab
[1][0][2] = ff_put_vp8_bilin8_h_rvv
;
65 c
->put_vp8_bilinear_pixels_tab
[2][0][1] = ff_put_vp8_bilin4_h_rvv
;
66 c
->put_vp8_bilinear_pixels_tab
[2][0][2] = ff_put_vp8_bilin4_h_rvv
;
68 c
->put_vp8_bilinear_pixels_tab
[0][1][0] = ff_put_vp8_bilin16_v_rvv
;
69 c
->put_vp8_bilinear_pixels_tab
[0][2][0] = ff_put_vp8_bilin16_v_rvv
;
70 c
->put_vp8_bilinear_pixels_tab
[1][1][0] = ff_put_vp8_bilin8_v_rvv
;
71 c
->put_vp8_bilinear_pixels_tab
[1][2][0] = ff_put_vp8_bilin8_v_rvv
;
72 c
->put_vp8_bilinear_pixels_tab
[2][1][0] = ff_put_vp8_bilin4_v_rvv
;
73 c
->put_vp8_bilinear_pixels_tab
[2][2][0] = ff_put_vp8_bilin4_v_rvv
;
75 c
->put_vp8_bilinear_pixels_tab
[0][1][1] = ff_put_vp8_bilin16_hv_rvv
;
76 c
->put_vp8_bilinear_pixels_tab
[0][1][2] = ff_put_vp8_bilin16_hv_rvv
;
77 c
->put_vp8_bilinear_pixels_tab
[0][2][1] = ff_put_vp8_bilin16_hv_rvv
;
78 c
->put_vp8_bilinear_pixels_tab
[0][2][2] = ff_put_vp8_bilin16_hv_rvv
;
79 c
->put_vp8_bilinear_pixels_tab
[1][1][1] = ff_put_vp8_bilin8_hv_rvv
;
80 c
->put_vp8_bilinear_pixels_tab
[1][1][2] = ff_put_vp8_bilin8_hv_rvv
;
81 c
->put_vp8_bilinear_pixels_tab
[1][2][1] = ff_put_vp8_bilin8_hv_rvv
;
82 c
->put_vp8_bilinear_pixels_tab
[1][2][2] = ff_put_vp8_bilin8_hv_rvv
;
83 c
->put_vp8_bilinear_pixels_tab
[2][1][1] = ff_put_vp8_bilin4_hv_rvv
;
84 c
->put_vp8_bilinear_pixels_tab
[2][1][2] = ff_put_vp8_bilin4_hv_rvv
;
85 c
->put_vp8_bilinear_pixels_tab
[2][2][1] = ff_put_vp8_bilin4_hv_rvv
;
86 c
->put_vp8_bilinear_pixels_tab
[2][2][2] = ff_put_vp8_bilin4_hv_rvv
;
88 if (flags
& AV_CPU_FLAG_RVB_ADDR
) {
89 c
->put_vp8_epel_pixels_tab
[0][0][2] = ff_put_vp8_epel16_h6_rvv
;
90 c
->put_vp8_epel_pixels_tab
[1][0][2] = ff_put_vp8_epel8_h6_rvv
;
91 c
->put_vp8_epel_pixels_tab
[2][0][2] = ff_put_vp8_epel4_h6_rvv
;
92 c
->put_vp8_epel_pixels_tab
[0][0][1] = ff_put_vp8_epel16_h4_rvv
;
93 c
->put_vp8_epel_pixels_tab
[1][0][1] = ff_put_vp8_epel8_h4_rvv
;
94 c
->put_vp8_epel_pixels_tab
[2][0][1] = ff_put_vp8_epel4_h4_rvv
;
96 c
->put_vp8_epel_pixels_tab
[0][2][0] = ff_put_vp8_epel16_v6_rvv
;
97 c
->put_vp8_epel_pixels_tab
[1][2][0] = ff_put_vp8_epel8_v6_rvv
;
98 c
->put_vp8_epel_pixels_tab
[2][2][0] = ff_put_vp8_epel4_v6_rvv
;
99 c
->put_vp8_epel_pixels_tab
[0][1][0] = ff_put_vp8_epel16_v4_rvv
;
100 c
->put_vp8_epel_pixels_tab
[1][1][0] = ff_put_vp8_epel8_v4_rvv
;
101 c
->put_vp8_epel_pixels_tab
[2][1][0] = ff_put_vp8_epel4_v4_rvv
;
102 #if __riscv_xlen <= 64
103 c
->put_vp8_epel_pixels_tab
[0][2][2] = ff_put_vp8_epel16_h6v6_rvv
;
104 c
->put_vp8_epel_pixels_tab
[1][2][2] = ff_put_vp8_epel8_h6v6_rvv
;
105 c
->put_vp8_epel_pixels_tab
[2][2][2] = ff_put_vp8_epel4_h6v6_rvv
;
106 c
->put_vp8_epel_pixels_tab
[0][2][1] = ff_put_vp8_epel16_h4v6_rvv
;
107 c
->put_vp8_epel_pixels_tab
[1][2][1] = ff_put_vp8_epel8_h4v6_rvv
;
108 c
->put_vp8_epel_pixels_tab
[2][2][1] = ff_put_vp8_epel4_h4v6_rvv
;
109 c
->put_vp8_epel_pixels_tab
[0][1][1] = ff_put_vp8_epel16_h4v4_rvv
;
110 c
->put_vp8_epel_pixels_tab
[1][1][1] = ff_put_vp8_epel8_h4v4_rvv
;
111 c
->put_vp8_epel_pixels_tab
[2][1][1] = ff_put_vp8_epel4_h4v4_rvv
;
112 c
->put_vp8_epel_pixels_tab
[0][1][2] = ff_put_vp8_epel16_h6v4_rvv
;
113 c
->put_vp8_epel_pixels_tab
[1][1][2] = ff_put_vp8_epel8_h6v4_rvv
;
114 c
->put_vp8_epel_pixels_tab
[2][1][2] = ff_put_vp8_epel4_h6v4_rvv
;
122 av_cold
void ff_vp8dsp_init_riscv(VP8DSPContext
*c
)
125 int flags
= av_get_cpu_flags();
127 if (flags
& AV_CPU_FLAG_RVV_I32
&& ff_rv_vlen_least(128)) {
128 #if __riscv_xlen >= 64
129 if (flags
& AV_CPU_FLAG_RVV_I64
)
130 c
->vp8_luma_dc_wht
= ff_vp8_luma_dc_wht_rvv
;
132 c
->vp8_idct_dc_add
= ff_vp8_idct_dc_add_rvv
;
133 c
->vp8_idct_dc_add4y
= ff_vp8_idct_dc_add4y_rvv
;
134 if (flags
& AV_CPU_FLAG_RVB_ADDR
) {
135 c
->vp8_idct_dc_add4uv
= ff_vp8_idct_dc_add4uv_rvv
;