2 * ARM optimized DSP utils
3 * Copyright (c) 2001 Lionel Ulmer
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 #include "libavcodec/dsputil.h"
27 void dsputil_init_iwmmxt(DSPContext
* c
, AVCodecContext
*avctx
);
28 void ff_float_init_arm_vfp(DSPContext
* c
, AVCodecContext
*avctx
);
29 void ff_dsputil_init_neon(DSPContext
*c
, AVCodecContext
*avctx
);
31 void j_rev_dct_ARM(DCTELEM
*data
);
32 void simple_idct_ARM(DCTELEM
*data
);
34 void simple_idct_armv5te(DCTELEM
*data
);
35 void simple_idct_put_armv5te(uint8_t *dest
, int line_size
, DCTELEM
*data
);
36 void simple_idct_add_armv5te(uint8_t *dest
, int line_size
, DCTELEM
*data
);
38 void ff_simple_idct_armv6(DCTELEM
*data
);
39 void ff_simple_idct_put_armv6(uint8_t *dest
, int line_size
, DCTELEM
*data
);
40 void ff_simple_idct_add_armv6(uint8_t *dest
, int line_size
, DCTELEM
*data
);
42 void ff_simple_idct_neon(DCTELEM
*data
);
43 void ff_simple_idct_put_neon(uint8_t *dest
, int line_size
, DCTELEM
*data
);
44 void ff_simple_idct_add_neon(uint8_t *dest
, int line_size
, DCTELEM
*data
);
47 static void (*ff_put_pixels_clamped
)(const DCTELEM
*block
, uint8_t *pixels
, int line_size
);
48 static void (*ff_add_pixels_clamped
)(const DCTELEM
*block
, uint8_t *pixels
, int line_size
);
50 void put_pixels8_arm(uint8_t *block
, const uint8_t *pixels
, int line_size
, int h
);
51 void put_pixels8_x2_arm(uint8_t *block
, const uint8_t *pixels
, int line_size
, int h
);
52 void put_pixels8_y2_arm(uint8_t *block
, const uint8_t *pixels
, int line_size
, int h
);
53 void put_pixels8_xy2_arm(uint8_t *block
, const uint8_t *pixels
, int line_size
, int h
);
55 void put_no_rnd_pixels8_x2_arm(uint8_t *block
, const uint8_t *pixels
, int line_size
, int h
);
56 void put_no_rnd_pixels8_y2_arm(uint8_t *block
, const uint8_t *pixels
, int line_size
, int h
);
57 void put_no_rnd_pixels8_xy2_arm(uint8_t *block
, const uint8_t *pixels
, int line_size
, int h
);
59 void put_pixels16_arm(uint8_t *block
, const uint8_t *pixels
, int line_size
, int h
);
61 void ff_prefetch_arm(void *mem
, int stride
, int h
);
63 CALL_2X_PIXELS(put_pixels16_x2_arm
, put_pixels8_x2_arm
, 8)
64 CALL_2X_PIXELS(put_pixels16_y2_arm
, put_pixels8_y2_arm
, 8)
65 CALL_2X_PIXELS(put_pixels16_xy2_arm
, put_pixels8_xy2_arm
, 8)
66 CALL_2X_PIXELS(put_no_rnd_pixels16_x2_arm
, put_no_rnd_pixels8_x2_arm
, 8)
67 CALL_2X_PIXELS(put_no_rnd_pixels16_y2_arm
, put_no_rnd_pixels8_y2_arm
, 8)
68 CALL_2X_PIXELS(put_no_rnd_pixels16_xy2_arm
, put_no_rnd_pixels8_xy2_arm
, 8)
70 void ff_add_pixels_clamped_ARM(short *block
, unsigned char *dest
,
73 /* XXX: those functions should be suppressed ASAP when all IDCTs are
75 static void j_rev_dct_ARM_put(uint8_t *dest
, int line_size
, DCTELEM
*block
)
77 j_rev_dct_ARM (block
);
78 ff_put_pixels_clamped(block
, dest
, line_size
);
80 static void j_rev_dct_ARM_add(uint8_t *dest
, int line_size
, DCTELEM
*block
)
82 j_rev_dct_ARM (block
);
83 ff_add_pixels_clamped(block
, dest
, line_size
);
85 static void simple_idct_ARM_put(uint8_t *dest
, int line_size
, DCTELEM
*block
)
87 simple_idct_ARM (block
);
88 ff_put_pixels_clamped(block
, dest
, line_size
);
90 static void simple_idct_ARM_add(uint8_t *dest
, int line_size
, DCTELEM
*block
)
92 simple_idct_ARM (block
);
93 ff_add_pixels_clamped(block
, dest
, line_size
);
97 static void simple_idct_ipp(DCTELEM
*block
)
99 ippiDCT8x8Inv_Video_16s_C1I(block
);
101 static void simple_idct_ipp_put(uint8_t *dest
, int line_size
, DCTELEM
*block
)
103 ippiDCT8x8Inv_Video_16s8u_C1R(block
, dest
, line_size
);
106 void add_pixels_clamped_iwmmxt(const DCTELEM
*block
, uint8_t *pixels
, int line_size
);
108 static void simple_idct_ipp_add(uint8_t *dest
, int line_size
, DCTELEM
*block
)
110 ippiDCT8x8Inv_Video_16s_C1I(block
);
112 add_pixels_clamped_iwmmxt(block
, dest
, line_size
);
114 ff_add_pixels_clamped_ARM(block
, dest
, line_size
);
121 return HAVE_IWMMXT
* FF_MM_IWMMXT
;
124 void dsputil_init_arm(DSPContext
* c
, AVCodecContext
*avctx
)
126 int idct_algo
= avctx
->idct_algo
;
128 ff_put_pixels_clamped
= c
->put_pixels_clamped
;
129 ff_add_pixels_clamped
= c
->add_pixels_clamped
;
131 if (avctx
->lowres
== 0) {
132 if(idct_algo
== FF_IDCT_AUTO
){
134 idct_algo
= FF_IDCT_IPP
;
136 idct_algo
= FF_IDCT_SIMPLENEON
;
138 idct_algo
= FF_IDCT_SIMPLEARMV6
;
140 idct_algo
= FF_IDCT_SIMPLEARMV5TE
;
142 idct_algo
= FF_IDCT_ARM
;
146 if(idct_algo
==FF_IDCT_ARM
){
147 c
->idct_put
= j_rev_dct_ARM_put
;
148 c
->idct_add
= j_rev_dct_ARM_add
;
149 c
->idct
= j_rev_dct_ARM
;
150 c
->idct_permutation_type
= FF_LIBMPEG2_IDCT_PERM
;
151 } else if (idct_algo
==FF_IDCT_SIMPLEARM
){
152 c
->idct_put
= simple_idct_ARM_put
;
153 c
->idct_add
= simple_idct_ARM_add
;
154 c
->idct
= simple_idct_ARM
;
155 c
->idct_permutation_type
= FF_NO_IDCT_PERM
;
157 } else if (idct_algo
==FF_IDCT_SIMPLEARMV6
){
158 c
->idct_put
= ff_simple_idct_put_armv6
;
159 c
->idct_add
= ff_simple_idct_add_armv6
;
160 c
->idct
= ff_simple_idct_armv6
;
161 c
->idct_permutation_type
= FF_LIBMPEG2_IDCT_PERM
;
164 } else if (idct_algo
==FF_IDCT_SIMPLEARMV5TE
){
165 c
->idct_put
= simple_idct_put_armv5te
;
166 c
->idct_add
= simple_idct_add_armv5te
;
167 c
->idct
= simple_idct_armv5te
;
168 c
->idct_permutation_type
= FF_NO_IDCT_PERM
;
171 } else if (idct_algo
==FF_IDCT_IPP
){
172 c
->idct_put
= simple_idct_ipp_put
;
173 c
->idct_add
= simple_idct_ipp_add
;
174 c
->idct
= simple_idct_ipp
;
175 c
->idct_permutation_type
= FF_NO_IDCT_PERM
;
178 } else if (idct_algo
==FF_IDCT_SIMPLENEON
){
179 c
->idct_put
= ff_simple_idct_put_neon
;
180 c
->idct_add
= ff_simple_idct_add_neon
;
181 c
->idct
= ff_simple_idct_neon
;
182 c
->idct_permutation_type
= FF_PARTTRANS_IDCT_PERM
;
187 c
->put_pixels_tab
[0][0] = put_pixels16_arm
;
188 c
->put_pixels_tab
[0][1] = put_pixels16_x2_arm
;
189 c
->put_pixels_tab
[0][2] = put_pixels16_y2_arm
;
190 c
->put_pixels_tab
[0][3] = put_pixels16_xy2_arm
;
191 c
->put_no_rnd_pixels_tab
[0][0] = put_pixels16_arm
;
192 c
->put_no_rnd_pixels_tab
[0][1] = put_no_rnd_pixels16_x2_arm
;
193 c
->put_no_rnd_pixels_tab
[0][2] = put_no_rnd_pixels16_y2_arm
;
194 c
->put_no_rnd_pixels_tab
[0][3] = put_no_rnd_pixels16_xy2_arm
;
195 c
->put_pixels_tab
[1][0] = put_pixels8_arm
;
196 c
->put_pixels_tab
[1][1] = put_pixels8_x2_arm
;
197 c
->put_pixels_tab
[1][2] = put_pixels8_y2_arm
;
198 c
->put_pixels_tab
[1][3] = put_pixels8_xy2_arm
;
199 c
->put_no_rnd_pixels_tab
[1][0] = put_pixels8_arm
;
200 c
->put_no_rnd_pixels_tab
[1][1] = put_no_rnd_pixels8_x2_arm
;
201 c
->put_no_rnd_pixels_tab
[1][2] = put_no_rnd_pixels8_y2_arm
;
202 c
->put_no_rnd_pixels_tab
[1][3] = put_no_rnd_pixels8_xy2_arm
;
205 c
->prefetch
= ff_prefetch_arm
;
209 dsputil_init_iwmmxt(c
, avctx
);
212 ff_float_init_arm_vfp(c
, avctx
);
215 ff_dsputil_init_neon(c
, avctx
);