2 * DSP functions for Indeo Video Interactive codecs (Indeo4 and Indeo5)
4 * Copyright (c) 2009-2011 Maxim Poliakovski
6 * This file is part of Libav.
8 * Libav is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * Libav is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with Libav; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 * DSP functions (inverse transforms, motion compensation, wavelet recompostions)
26 * for Indeo Video Interactive codecs.
32 #include "ivi_common.h"
35 void ff_ivi_recompose53(const IVIPlaneDesc
*plane
, uint8_t *dst
,
39 int32_t p0
, p1
, p2
, p3
, tmp0
, tmp1
, tmp2
;
40 int32_t b0_1
, b0_2
, b1_1
, b1_2
, b1_3
, b2_1
, b2_2
, b2_3
, b2_4
, b2_5
, b2_6
;
41 int32_t b3_1
, b3_2
, b3_3
, b3_4
, b3_5
, b3_6
, b3_7
, b3_8
, b3_9
;
42 int32_t pitch
, back_pitch
;
43 const IDWTELEM
*b0_ptr
, *b1_ptr
, *b2_ptr
, *b3_ptr
;
44 const int num_bands
= 4;
46 /* all bands should have the same pitch */
47 pitch
= plane
->bands
[0].pitch
;
49 /* pixels at the position "y-1" will be set to pixels at the "y" for the 1st iteration */
52 /* get pointers to the wavelet bands */
53 b0_ptr
= plane
->bands
[0].buf
;
54 b1_ptr
= plane
->bands
[1].buf
;
55 b2_ptr
= plane
->bands
[2].buf
;
56 b3_ptr
= plane
->bands
[3].buf
;
58 for (y
= 0; y
< plane
->height
; y
+= 2) {
59 /* load storage variables with values */
66 b1_1
= b1_ptr
[back_pitch
];
68 b1_3
= b1_1
- b1_2
*6 + b1_ptr
[pitch
];
72 b2_2
= b2_ptr
[0]; // b2[x, y ]
73 b2_3
= b2_2
; // b2[x+1,y ] = b2[x,y]
74 b2_5
= b2_ptr
[pitch
]; // b2[x ,y+1]
75 b2_6
= b2_5
; // b2[x+1,y+1] = b2[x,y+1]
79 b3_2
= b3_ptr
[back_pitch
]; // b3[x ,y-1]
80 b3_3
= b3_2
; // b3[x+1,y-1] = b3[x ,y-1]
81 b3_5
= b3_ptr
[0]; // b3[x ,y ]
82 b3_6
= b3_5
; // b3[x+1,y ] = b3[x ,y ]
83 b3_8
= b3_2
- b3_5
*6 + b3_ptr
[pitch
];
87 for (x
= 0, indx
= 0; x
< plane
->width
; x
+=2, indx
++) {
88 /* some values calculated in the previous iterations can */
89 /* be reused in the next ones, so do appropriate copying */
90 b2_1
= b2_2
; // b2[x-1,y ] = b2[x, y ]
91 b2_2
= b2_3
; // b2[x ,y ] = b2[x+1,y ]
92 b2_4
= b2_5
; // b2[x-1,y+1] = b2[x ,y+1]
93 b2_5
= b2_6
; // b2[x ,y+1] = b2[x+1,y+1]
94 b3_1
= b3_2
; // b3[x-1,y-1] = b3[x ,y-1]
95 b3_2
= b3_3
; // b3[x ,y-1] = b3[x+1,y-1]
96 b3_4
= b3_5
; // b3[x-1,y ] = b3[x ,y ]
97 b3_5
= b3_6
; // b3[x ,y ] = b3[x+1,y ]
98 b3_7
= b3_8
; // vert_HPF(x-1)
99 b3_8
= b3_9
; // vert_HPF(x )
101 p0
= p1
= p2
= p3
= 0;
103 /* process the LL-band by applying LPF both vertically and horizontally */
107 b0_1
= b0_ptr
[indx
+1];
108 b0_2
= b0_ptr
[pitch
+indx
+1];
113 p2
= (tmp0
+ tmp2
) << 3;
114 p3
= (tmp1
+ tmp2
+ b0_2
) << 2;
117 /* process the HL-band by applying HPF vertically and LPF horizontally */
121 b1_2
= b1_ptr
[indx
+1];
122 b1_1
= b1_ptr
[back_pitch
+indx
+1];
124 tmp2
= tmp1
- tmp0
*6 + b1_3
;
125 b1_3
= b1_1
- b1_2
*6 + b1_ptr
[pitch
+indx
+1];
127 p0
+= (tmp0
+ tmp1
) << 3;
128 p1
+= (tmp0
+ tmp1
+ b1_1
+ b1_2
) << 2;
130 p3
+= (tmp2
+ b1_3
) << 1;
133 /* process the LH-band by applying LPF vertically and HPF horizontally */
135 b2_3
= b2_ptr
[indx
+1];
136 b2_6
= b2_ptr
[pitch
+indx
+1];
139 tmp1
= b2_1
- b2_2
*6 + b2_3
;
143 p2
+= (tmp0
+ b2_4
+ b2_5
) << 2;
144 p3
+= (tmp1
+ b2_4
- b2_5
*6 + b2_6
) << 1;
147 /* process the HH-band by applying HPF both vertically and horizontally */
149 b3_6
= b3_ptr
[indx
+1]; // b3[x+1,y ]
150 b3_3
= b3_ptr
[back_pitch
+indx
+1]; // b3[x+1,y-1]
156 b3_9
= b3_3
- b3_6
*6 + b3_ptr
[pitch
+indx
+1];
158 p0
+= (tmp0
+ tmp1
) << 2;
159 p1
+= (tmp0
- tmp1
*6 + tmp2
) << 1;
160 p2
+= (b3_7
+ b3_8
) << 1;
161 p3
+= b3_7
- b3_8
*6 + b3_9
;
164 /* output four pixels */
165 dst
[x
] = av_clip_uint8((p0
>> 6) + 128);
166 dst
[x
+1] = av_clip_uint8((p1
>> 6) + 128);
167 dst
[dst_pitch
+x
] = av_clip_uint8((p2
>> 6) + 128);
168 dst
[dst_pitch
+x
+1] = av_clip_uint8((p3
>> 6) + 128);
171 dst
+= dst_pitch
<< 1;
182 void ff_ivi_recompose_haar(const IVIPlaneDesc
*plane
, uint8_t *dst
,
185 int x
, y
, indx
, b0
, b1
, b2
, b3
, p0
, p1
, p2
, p3
;
186 const IDWTELEM
*b0_ptr
, *b1_ptr
, *b2_ptr
, *b3_ptr
;
189 /* all bands should have the same pitch */
190 pitch
= plane
->bands
[0].pitch
;
192 /* get pointers to the wavelet bands */
193 b0_ptr
= plane
->bands
[0].buf
;
194 b1_ptr
= plane
->bands
[1].buf
;
195 b2_ptr
= plane
->bands
[2].buf
;
196 b3_ptr
= plane
->bands
[3].buf
;
198 for (y
= 0; y
< plane
->height
; y
+= 2) {
199 for (x
= 0, indx
= 0; x
< plane
->width
; x
+= 2, indx
++) {
200 /* load coefficients */
201 b0
= b0_ptr
[indx
]; //should be: b0 = (num_bands > 0) ? b0_ptr[indx] : 0;
202 b1
= b1_ptr
[indx
]; //should be: b1 = (num_bands > 1) ? b1_ptr[indx] : 0;
203 b2
= b2_ptr
[indx
]; //should be: b2 = (num_bands > 2) ? b2_ptr[indx] : 0;
204 b3
= b3_ptr
[indx
]; //should be: b3 = (num_bands > 3) ? b3_ptr[indx] : 0;
206 /* haar wavelet recomposition */
207 p0
= (b0
+ b1
+ b2
+ b3
+ 2) >> 2;
208 p1
= (b0
+ b1
- b2
- b3
+ 2) >> 2;
209 p2
= (b0
- b1
+ b2
- b3
+ 2) >> 2;
210 p3
= (b0
- b1
- b2
+ b3
+ 2) >> 2;
212 /* bias, convert and output four pixels */
213 dst
[x
] = av_clip_uint8(p0
+ 128);
214 dst
[x
+ 1] = av_clip_uint8(p1
+ 128);
215 dst
[dst_pitch
+ x
] = av_clip_uint8(p2
+ 128);
216 dst
[dst_pitch
+ x
+ 1] = av_clip_uint8(p3
+ 128);
219 dst
+= dst_pitch
<< 1;
228 /** butterfly operation for the inverse Haar transform */
229 #define IVI_HAAR_BFLY(s1, s2, o1, o2, t) \
231 o1 = (s1 + s2) >> 1;\
234 /** inverse 8-point Haar transform */
235 #define INV_HAAR8(s1, s5, s3, s7, s2, s4, s6, s8,\
236 d1, d2, d3, d4, d5, d6, d7, d8,\
237 t0, t1, t2, t3, t4, t5, t6, t7, t8) {\
238 t1 = s1 << 1; t5 = s5 << 1;\
239 IVI_HAAR_BFLY(t1, t5, t1, t5, t0); IVI_HAAR_BFLY(t1, s3, t1, t3, t0);\
240 IVI_HAAR_BFLY(t5, s7, t5, t7, t0); IVI_HAAR_BFLY(t1, s2, t1, t2, t0);\
241 IVI_HAAR_BFLY(t3, s4, t3, t4, t0); IVI_HAAR_BFLY(t5, s6, t5, t6, t0);\
242 IVI_HAAR_BFLY(t7, s8, t7, t8, t0);\
243 d1 = COMPENSATE(t1);\
244 d2 = COMPENSATE(t2);\
245 d3 = COMPENSATE(t3);\
246 d4 = COMPENSATE(t4);\
247 d5 = COMPENSATE(t5);\
248 d6 = COMPENSATE(t6);\
249 d7 = COMPENSATE(t7);\
250 d8 = COMPENSATE(t8); }
252 /** inverse 4-point Haar transform */
253 #define INV_HAAR4(s1, s3, s5, s7) {\
254 HAAR_BFLY(s1, s5); HAAR_BFLY(s1, s3); HAAR_BFLY(s5, s7);\
255 s1 = COMPENSATE(s1);\
256 s3 = COMPENSATE(s3);\
257 s5 = COMPENSATE(s5);\
258 s7 = COMPENSATE(s7); }
260 void ff_ivi_inverse_haar_8x8(const int32_t *in
, int16_t *out
, uint32_t pitch
,
261 const uint8_t *flags
)
263 int i
, shift
, sp1
, sp2
, sp3
, sp4
;
267 int t0
, t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
269 /* apply the InvHaar8 to all columns */
270 #define COMPENSATE(x) (x)
273 for (i
= 0; i
< 8; i
++) {
277 sp1
= src
[ 0] << shift
;
278 sp2
= src
[ 8] << shift
;
279 sp3
= src
[16] << shift
;
280 sp4
= src
[24] << shift
;
281 INV_HAAR8( sp1
, sp2
, sp3
, sp4
,
282 src
[32], src
[40], src
[48], src
[56],
283 dst
[ 0], dst
[ 8], dst
[16], dst
[24],
284 dst
[32], dst
[40], dst
[48], dst
[56],
285 t0
, t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
);
287 dst
[ 0] = dst
[ 8] = dst
[16] = dst
[24] =
288 dst
[32] = dst
[40] = dst
[48] = dst
[56] = 0;
295 /* apply the InvHaar8 to all rows */
296 #define COMPENSATE(x) (x)
298 for (i
= 0; i
< 8; i
++) {
299 if ( !src
[0] && !src
[1] && !src
[2] && !src
[3]
300 && !src
[4] && !src
[5] && !src
[6] && !src
[7]) {
301 memset(out
, 0, 8 * sizeof(out
[0]));
303 INV_HAAR8(src
[0], src
[1], src
[2], src
[3],
304 src
[4], src
[5], src
[6], src
[7],
305 out
[0], out
[1], out
[2], out
[3],
306 out
[4], out
[5], out
[6], out
[7],
307 t0
, t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
);
315 void ff_ivi_dc_haar_2d(const int32_t *in
, int16_t *out
, uint32_t pitch
,
321 dc_coeff
= (*in
+ 0) >> 3;
323 for (y
= 0; y
< blk_size
; out
+= pitch
, y
++) {
324 for (x
= 0; x
< blk_size
; x
++)
329 /** butterfly operation for the inverse slant transform */
330 #define IVI_SLANT_BFLY(s1, s2, o1, o2, t) \
335 /** This is a reflection a,b = 1/2, 5/4 for the inverse slant transform */
336 #define IVI_IREFLECT(s1, s2, o1, o2, t) \
337 t = ((s1 + s2*2 + 2) >> 2) + s1;\
338 o2 = ((s1*2 - s2 + 2) >> 2) - s2;\
341 /** This is a reflection a,b = 1/2, 7/8 for the inverse slant transform */
342 #define IVI_SLANT_PART4(s1, s2, o1, o2, t) \
343 t = s2 + ((s1*4 - s2 + 4) >> 3);\
344 o2 = s1 + ((-s1 - s2*4 + 4) >> 3);\
347 /** inverse slant8 transform */
348 #define IVI_INV_SLANT8(s1, s4, s8, s5, s2, s6, s3, s7,\
349 d1, d2, d3, d4, d5, d6, d7, d8,\
350 t0, t1, t2, t3, t4, t5, t6, t7, t8) {\
351 IVI_SLANT_PART4(s4, s5, t4, t5, t0);\
353 IVI_SLANT_BFLY(s1, t5, t1, t5, t0); IVI_SLANT_BFLY(s2, s6, t2, t6, t0);\
354 IVI_SLANT_BFLY(s7, s3, t7, t3, t0); IVI_SLANT_BFLY(t4, s8, t4, t8, t0);\
356 IVI_SLANT_BFLY(t1, t2, t1, t2, t0); IVI_IREFLECT (t4, t3, t4, t3, t0);\
357 IVI_SLANT_BFLY(t5, t6, t5, t6, t0); IVI_IREFLECT (t8, t7, t8, t7, t0);\
358 IVI_SLANT_BFLY(t1, t4, t1, t4, t0); IVI_SLANT_BFLY(t2, t3, t2, t3, t0);\
359 IVI_SLANT_BFLY(t5, t8, t5, t8, t0); IVI_SLANT_BFLY(t6, t7, t6, t7, t0);\
360 d1 = COMPENSATE(t1);\
361 d2 = COMPENSATE(t2);\
362 d3 = COMPENSATE(t3);\
363 d4 = COMPENSATE(t4);\
364 d5 = COMPENSATE(t5);\
365 d6 = COMPENSATE(t6);\
366 d7 = COMPENSATE(t7);\
367 d8 = COMPENSATE(t8);}
369 /** inverse slant4 transform */
370 #define IVI_INV_SLANT4(s1, s4, s2, s3, d1, d2, d3, d4, t0, t1, t2, t3, t4) {\
371 IVI_SLANT_BFLY(s1, s2, t1, t2, t0); IVI_IREFLECT (s4, s3, t4, t3, t0);\
373 IVI_SLANT_BFLY(t1, t4, t1, t4, t0); IVI_SLANT_BFLY(t2, t3, t2, t3, t0);\
374 d1 = COMPENSATE(t1);\
375 d2 = COMPENSATE(t2);\
376 d3 = COMPENSATE(t3);\
377 d4 = COMPENSATE(t4);}
379 void ff_ivi_inverse_slant_8x8(const int32_t *in
, int16_t *out
, uint32_t pitch
, const uint8_t *flags
)
385 int t0
, t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
387 #define COMPENSATE(x) (x)
390 for (i
= 0; i
< 8; i
++) {
392 IVI_INV_SLANT8(src
[0], src
[8], src
[16], src
[24], src
[32], src
[40], src
[48], src
[56],
393 dst
[0], dst
[8], dst
[16], dst
[24], dst
[32], dst
[40], dst
[48], dst
[56],
394 t0
, t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
);
396 dst
[0] = dst
[8] = dst
[16] = dst
[24] = dst
[32] = dst
[40] = dst
[48] = dst
[56] = 0;
403 #define COMPENSATE(x) ((x + 1)>>1)
405 for (i
= 0; i
< 8; i
++) {
406 if (!src
[0] && !src
[1] && !src
[2] && !src
[3] && !src
[4] && !src
[5] && !src
[6] && !src
[7]) {
407 memset(out
, 0, 8*sizeof(out
[0]));
409 IVI_INV_SLANT8(src
[0], src
[1], src
[2], src
[3], src
[4], src
[5], src
[6], src
[7],
410 out
[0], out
[1], out
[2], out
[3], out
[4], out
[5], out
[6], out
[7],
411 t0
, t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
);
419 void ff_ivi_inverse_slant_4x4(const int32_t *in
, int16_t *out
, uint32_t pitch
, const uint8_t *flags
)
425 int t0
, t1
, t2
, t3
, t4
;
427 #define COMPENSATE(x) (x)
430 for (i
= 0; i
< 4; i
++) {
432 IVI_INV_SLANT4(src
[0], src
[4], src
[8], src
[12],
433 dst
[0], dst
[4], dst
[8], dst
[12],
436 dst
[0] = dst
[4] = dst
[8] = dst
[12] = 0;
443 #define COMPENSATE(x) ((x + 1)>>1)
445 for (i
= 0; i
< 4; i
++) {
446 if (!src
[0] && !src
[1] && !src
[2] && !src
[3]) {
447 out
[0] = out
[1] = out
[2] = out
[3] = 0;
449 IVI_INV_SLANT4(src
[0], src
[1], src
[2], src
[3],
450 out
[0], out
[1], out
[2], out
[3],
459 void ff_ivi_dc_slant_2d(const int32_t *in
, int16_t *out
, uint32_t pitch
, int blk_size
)
464 dc_coeff
= (*in
+ 1) >> 1;
466 for (y
= 0; y
< blk_size
; out
+= pitch
, y
++) {
467 for (x
= 0; x
< blk_size
; x
++)
472 void ff_ivi_row_slant8(const int32_t *in
, int16_t *out
, uint32_t pitch
, const uint8_t *flags
)
475 int t0
, t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
477 #define COMPENSATE(x) ((x + 1)>>1)
478 for (i
= 0; i
< 8; i
++) {
479 if (!in
[0] && !in
[1] && !in
[2] && !in
[3] && !in
[4] && !in
[5] && !in
[6] && !in
[7]) {
480 memset(out
, 0, 8*sizeof(out
[0]));
482 IVI_INV_SLANT8( in
[0], in
[1], in
[2], in
[3], in
[4], in
[5], in
[6], in
[7],
483 out
[0], out
[1], out
[2], out
[3], out
[4], out
[5], out
[6], out
[7],
484 t0
, t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
);
492 void ff_ivi_dc_row_slant(const int32_t *in
, int16_t *out
, uint32_t pitch
, int blk_size
)
497 dc_coeff
= (*in
+ 1) >> 1;
499 for (x
= 0; x
< blk_size
; x
++)
504 for (y
= 1; y
< blk_size
; out
+= pitch
, y
++) {
505 for (x
= 0; x
< blk_size
; x
++)
510 void ff_ivi_col_slant8(const int32_t *in
, int16_t *out
, uint32_t pitch
, const uint8_t *flags
)
512 int i
, row2
, row4
, row8
;
513 int t0
, t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
519 #define COMPENSATE(x) ((x + 1)>>1)
520 for (i
= 0; i
< 8; i
++) {
522 IVI_INV_SLANT8(in
[0], in
[8], in
[16], in
[24], in
[32], in
[40], in
[48], in
[56],
523 out
[0], out
[pitch
], out
[row2
], out
[row2
+ pitch
], out
[row4
],
524 out
[row4
+ pitch
], out
[row4
+ row2
], out
[row8
- pitch
],
525 t0
, t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
);
527 out
[0] = out
[pitch
] = out
[row2
] = out
[row2
+ pitch
] = out
[row4
] =
528 out
[row4
+ pitch
] = out
[row4
+ row2
] = out
[row8
- pitch
] = 0;
537 void ff_ivi_dc_col_slant(const int32_t *in
, int16_t *out
, uint32_t pitch
, int blk_size
)
542 dc_coeff
= (*in
+ 1) >> 1;
544 for (y
= 0; y
< blk_size
; out
+= pitch
, y
++) {
546 for (x
= 1; x
< blk_size
; x
++)
551 void ff_ivi_put_pixels_8x8(const int32_t *in
, int16_t *out
, uint32_t pitch
,
552 const uint8_t *flags
)
556 for (y
= 0; y
< 8; out
+= pitch
, in
+= 8, y
++)
557 for (x
= 0; x
< 8; x
++)
561 void ff_ivi_put_dc_pixel_8x8(const int32_t *in
, int16_t *out
, uint32_t pitch
,
567 memset(out
+ 1, 0, 7*sizeof(out
[0]));
570 for (y
= 1; y
< 8; out
+= pitch
, y
++)
571 memset(out
, 0, 8*sizeof(out
[0]));
574 #define IVI_MC_TEMPLATE(size, suffix, OP) \
575 void ff_ivi_mc_ ## size ##x## size ## suffix (int16_t *buf, const int16_t *ref_buf, \
576 uint32_t pitch, int mc_type) \
579 const int16_t *wptr; \
582 case 0: /* fullpel (no interpolation) */ \
583 for (i = 0; i < size; i++, buf += pitch, ref_buf += pitch) { \
584 for (j = 0; j < size; j++) {\
585 OP(buf[j], ref_buf[j]); \
589 case 1: /* horizontal halfpel interpolation */ \
590 for (i = 0; i < size; i++, buf += pitch, ref_buf += pitch) \
591 for (j = 0; j < size; j++) \
592 OP(buf[j], (ref_buf[j] + ref_buf[j+1]) >> 1); \
594 case 2: /* vertical halfpel interpolation */ \
595 wptr = ref_buf + pitch; \
596 for (i = 0; i < size; i++, buf += pitch, wptr += pitch, ref_buf += pitch) \
597 for (j = 0; j < size; j++) \
598 OP(buf[j], (ref_buf[j] + wptr[j]) >> 1); \
600 case 3: /* vertical and horizontal halfpel interpolation */ \
601 wptr = ref_buf + pitch; \
602 for (i = 0; i < size; i++, buf += pitch, wptr += pitch, ref_buf += pitch) \
603 for (j = 0; j < size; j++) \
604 OP(buf[j], (ref_buf[j] + ref_buf[j+1] + wptr[j] + wptr[j+1]) >> 2); \
609 #define OP_PUT(a, b) (a) = (b)
610 #define OP_ADD(a, b) (a) += (b)
612 IVI_MC_TEMPLATE(8, _no_delta
, OP_PUT
)
613 IVI_MC_TEMPLATE(8, _delta
, OP_ADD
)
614 IVI_MC_TEMPLATE(4, _no_delta
, OP_PUT
)
615 IVI_MC_TEMPLATE(4, _delta
, OP_ADD
)