Eliminate use of SSE instruction in loop filter MMX code by replacing
[xiph/unicode.git] / theora / lib / enc / pp.c
blobbdd9b6ce684c7b77d4dbf0994af85a09a3086067
1 /********************************************************************
2 * *
3 * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
5 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
7 * *
8 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
9 * by the Xiph.Org Foundation http://www.xiph.org/ *
10 * *
11 ********************************************************************
13 function:
14 last mod: $Id$
16 ********************************************************************/
18 #include <stdlib.h>
19 #include <string.h>
20 #include "codec_internal.h"
21 #include "pp.h"
22 #include "dsp.h"
24 #define MAX(a, b) ((a>b)?a:b)
25 #define MIN(a, b) ((a<b)?a:b)
26 #define PP_QUALITY_THRESH 49
28 static const ogg_int32_t SharpenModifier[ Q_TABLE_SIZE ] =
29 { -12, -11, -10, -10, -9, -9, -9, -9,
30 -6, -6, -6, -6, -6, -6, -6, -6,
31 -4, -4, -4, -4, -4, -4, -4, -4,
32 -2, -2, -2, -2, -2, -2, -2, -2,
33 -2, -2, -2, -2, -2, -2, -2, -2,
34 0, 0, 0, 0, 0, 0, 0, 0,
35 0, 0, 0, 0, 0, 0, 0, 0,
36 0, 0, 0, 0, 0, 0, 0, 0
39 static const ogg_uint32_t DcQuantScaleV1[ Q_TABLE_SIZE ] = {
40 22, 20, 19, 18, 17, 17, 16, 16,
41 15, 15, 14, 14, 13, 13, 12, 12,
42 11, 11, 10, 10, 9, 9, 9, 8,
43 8, 8, 7, 7, 7, 6, 6, 6,
44 6, 5, 5, 5, 5, 4, 4, 4,
45 4, 4, 3, 3, 3, 3, 3, 3,
46 3, 2, 2, 2, 2, 2, 2, 2,
47 2, 1, 1, 1, 1, 1, 1, 1
50 static const ogg_uint32_t * const DeringModifierV1=DcQuantScaleV1;
52 static void PClearFrameInfo(PP_INSTANCE * ppi){
53 int i;
55 if(ppi->ScanPixelIndexTable) _ogg_free(ppi->ScanPixelIndexTable);
56 ppi->ScanPixelIndexTable=0;
58 if(ppi->ScanDisplayFragments) _ogg_free(ppi->ScanDisplayFragments);
59 ppi->ScanDisplayFragments=0;
61 for(i = 0 ; i < MAX_PREV_FRAMES ; i ++)
62 if(ppi->PrevFragments[i]){
63 _ogg_free(ppi->PrevFragments[i]);
64 ppi->PrevFragments[i]=0;
67 if(ppi->FragScores) _ogg_free(ppi->FragScores);
68 ppi->FragScores=0;
70 if(ppi->SameGreyDirPixels) _ogg_free(ppi->SameGreyDirPixels);
71 ppi->SameGreyDirPixels=0;
73 if(ppi->FragDiffPixels) _ogg_free(ppi->FragDiffPixels);
74 ppi->FragDiffPixels=0;
76 if(ppi->BarBlockMap) _ogg_free(ppi->BarBlockMap);
77 ppi->BarBlockMap=0;
79 if(ppi->TmpCodedMap) _ogg_free(ppi->TmpCodedMap);
80 ppi->TmpCodedMap=0;
82 if(ppi->RowChangedPixels) _ogg_free(ppi->RowChangedPixels);
83 ppi->RowChangedPixels=0;
85 if(ppi->PixelScores) _ogg_free(ppi->PixelScores);
86 ppi->PixelScores=0;
88 if(ppi->PixelChangedMap) _ogg_free(ppi->PixelChangedMap);
89 ppi->PixelChangedMap=0;
91 if(ppi->ChLocals) _ogg_free(ppi->ChLocals);
92 ppi->ChLocals=0;
94 if(ppi->yuv_differences) _ogg_free(ppi->yuv_differences);
95 ppi->yuv_differences=0;
99 void PInitFrameInfo(PP_INSTANCE * ppi){
100 int i;
101 PClearFrameInfo(ppi);
103 ppi->ScanPixelIndexTable =
104 _ogg_malloc(ppi->ScanFrameFragments*sizeof(*ppi->ScanPixelIndexTable));
106 ppi->ScanDisplayFragments =
107 _ogg_malloc(ppi->ScanFrameFragments*sizeof(*ppi->ScanDisplayFragments));
109 for(i = 0 ; i < MAX_PREV_FRAMES ; i ++)
110 ppi->PrevFragments[i] =
111 _ogg_malloc(ppi->ScanFrameFragments*sizeof(*ppi->PrevFragments));
113 ppi->FragScores =
114 _ogg_malloc(ppi->ScanFrameFragments*sizeof(*ppi->FragScores));
116 ppi->SameGreyDirPixels =
117 _ogg_malloc(ppi->ScanFrameFragments*sizeof(*ppi->SameGreyDirPixels));
119 ppi->FragDiffPixels =
120 _ogg_malloc(ppi->ScanFrameFragments*sizeof(*ppi->FragScores));
122 ppi->BarBlockMap=
123 _ogg_malloc(3 * ppi->ScanHFragments*sizeof(*ppi->BarBlockMap));
125 ppi->TmpCodedMap =
126 _ogg_malloc(ppi->ScanHFragments*sizeof(*ppi->TmpCodedMap));
128 ppi->RowChangedPixels =
129 _ogg_malloc(3 * ppi->ScanConfig.VideoFrameHeight*
130 sizeof(*ppi->RowChangedPixels));
132 ppi->PixelScores =
133 _ogg_malloc(ppi->ScanConfig.VideoFrameWidth*
134 sizeof(*ppi->PixelScores) * PSCORE_CB_ROWS);
136 ppi->PixelChangedMap =
137 _ogg_malloc(ppi->ScanConfig.VideoFrameWidth*
138 sizeof(*ppi->PixelChangedMap) * PMAP_CB_ROWS);
140 ppi->ChLocals =
141 _ogg_malloc(ppi->ScanConfig.VideoFrameWidth*
142 sizeof(*ppi->ChLocals) * CHLOCALS_CB_ROWS);
144 ppi->yuv_differences =
145 _ogg_malloc(ppi->ScanConfig.VideoFrameWidth*
146 sizeof(*ppi->yuv_differences) * YDIFF_CB_ROWS);
149 void ClearPPInstance(PP_INSTANCE *ppi){
150 PClearFrameInfo(ppi);
154 void InitPPInstance(PP_INSTANCE *ppi, DspFunctions *funcs){
156 memset(ppi,0,sizeof(*ppi));
158 memcpy(&ppi->dsp, funcs, sizeof(DspFunctions));
160 /* Initializations */
161 ppi->PrevFrameLimit = 3; /* Must not exceed MAX_PREV_FRAMES (Note
162 that this number includes the current
163 frame so "1 = no effect") */
165 /* Scan control variables. */
166 ppi->HFragPixels = 8;
167 ppi->VFragPixels = 8;
169 ppi->SRFGreyThresh = 4;
170 ppi->SRFColThresh = 5;
171 ppi->NoiseSupLevel = 3;
172 ppi->SgcLevelThresh = 3;
173 ppi->SuvcLevelThresh = 4;
175 /* Variables controlling S.A.D. breakouts. */
176 ppi->GrpLowSadThresh = 10;
177 ppi->GrpHighSadThresh = 64;
178 ppi->PrimaryBlockThreshold = 5;
179 ppi->SgcThresh = 16; /* (Default values for 8x8 blocks). */
181 ppi->UVBlockThreshCorrection = 1.25;
182 ppi->UVSgcCorrection = 1.5;
184 ppi->MaxLineSearchLen = MAX_SEARCH_LINE_LEN;
187 static void DeringBlockStrong(unsigned char *SrcPtr,
188 unsigned char *DstPtr,
189 ogg_int32_t Pitch,
190 ogg_uint32_t FragQIndex,
191 const ogg_uint32_t *QuantScale){
193 ogg_int16_t UDMod[72];
194 ogg_int16_t LRMod[72];
195 unsigned int j,k,l;
196 const unsigned char * Src;
197 unsigned int QValue = QuantScale[FragQIndex];
199 unsigned char p;
200 unsigned char pl;
201 unsigned char pr;
202 unsigned char pu;
203 unsigned char pd;
205 int al;
206 int ar;
207 int au;
208 int ad;
210 int atot;
211 int B;
212 int newVal;
214 const unsigned char *curRow = SrcPtr - 1; /* avoid negative array indexes */
215 unsigned char *dstRow = DstPtr;
216 const unsigned char *lastRow = SrcPtr-Pitch;
217 const unsigned char *nextRow = SrcPtr+Pitch;
219 unsigned int rowOffset = 0;
220 unsigned int round = (1<<6);
222 int High;
223 int Low;
224 int TmpMod;
226 int Sharpen = SharpenModifier[FragQIndex];
227 High = 3 * QValue;
228 if(High>32)High=32;
229 Low = 0;
232 /* Initialize the Mod Data */
233 Src = SrcPtr-Pitch;
234 for(k=0;k<9;k++){
235 for(j=0;j<8;j++){
237 TmpMod = 32 + QValue - (abs(Src[j+Pitch]-Src[j]));
239 if(TmpMod< -64)
240 TmpMod = Sharpen;
242 else if(TmpMod<Low)
243 TmpMod = Low;
245 else if(TmpMod>High)
246 TmpMod = High;
248 UDMod[k*8+j] = (ogg_int16_t)TmpMod;
250 Src +=Pitch;
253 Src = SrcPtr-1;
255 for(k=0;k<8;k++){
256 for(j=0;j<9;j++){
257 TmpMod = 32 + QValue - (abs(Src[j+1]-Src[j]));
259 if(TmpMod< -64 )
260 TmpMod = Sharpen;
262 else if(TmpMod<0)
263 TmpMod = Low;
265 else if(TmpMod>High)
266 TmpMod = High;
268 LRMod[k*9+j] = (ogg_int16_t)TmpMod;
270 Src+=Pitch;
273 for(k=0;k<8;k++){
274 /* In the case that this function called with same buffer for
275 source and destination, To keep the c and the mmx version to have
276 consistant results, intermediate buffer is used to store the
277 eight pixel value before writing them to destination
278 (i.e. Overwriting souce for the speical case) */
279 for(l=0;l<8;l++){
281 atot = 128;
282 B = round;
283 p = curRow[ rowOffset +l +1];
285 pl = curRow[ rowOffset +l];
286 al = LRMod[k*9+l];
287 atot -= al;
288 B += al * pl;
290 pu = lastRow[ rowOffset +l];
291 au = UDMod[k*8+l];
292 atot -= au;
293 B += au * pu;
295 pd = nextRow[ rowOffset +l];
296 ad = UDMod[(k+1)*8+l];
297 atot -= ad;
298 B += ad * pd;
300 pr = curRow[ rowOffset +l+2];
301 ar = LRMod[k*9+l+1];
302 atot -= ar;
303 B += ar * pr;
305 newVal = ( atot * p + B) >> 7;
307 dstRow[ rowOffset +l]= clamp255( newVal );
309 rowOffset += Pitch;
313 static void DeringBlockWeak(unsigned char *SrcPtr,
314 unsigned char *DstPtr,
315 ogg_int32_t Pitch,
316 ogg_uint32_t FragQIndex,
317 const ogg_uint32_t *QuantScale){
319 ogg_int16_t UDMod[72];
320 ogg_int16_t LRMod[72];
321 unsigned int j,k;
322 const unsigned char * Src;
323 unsigned int QValue = QuantScale[FragQIndex];
325 unsigned char p;
326 unsigned char pl;
327 unsigned char pr;
328 unsigned char pu;
329 unsigned char pd;
331 int al;
332 int ar;
333 int au;
334 int ad;
336 int atot;
337 int B;
338 int newVal;
340 const unsigned char *curRow = SrcPtr-1;
341 unsigned char *dstRow = DstPtr;
342 const unsigned char *lastRow = SrcPtr-Pitch;
343 const unsigned char *nextRow = SrcPtr+Pitch;
345 unsigned int rowOffset = 0;
346 unsigned int round = (1<<6);
348 int High;
349 int Low;
350 int TmpMod;
351 int Sharpen = SharpenModifier[FragQIndex];
353 High = 3 * QValue;
354 if(High>24)
355 High=24;
356 Low = 0 ;
358 /* Initialize the Mod Data */
359 Src=SrcPtr-Pitch;
360 for(k=0;k<9;k++) {
361 for(j=0;j<8;j++) {
363 TmpMod = 32 + QValue - 2*(abs(Src[j+Pitch]-Src[j]));
365 if(TmpMod< -64)
366 TmpMod = Sharpen;
368 else if(TmpMod<Low)
369 TmpMod = Low;
371 else if(TmpMod>High)
372 TmpMod = High;
374 UDMod[k*8+j] = (ogg_int16_t)TmpMod;
376 Src +=Pitch;
379 Src = SrcPtr-1;
381 for(k=0;k<8;k++){
382 for(j=0;j<9;j++){
383 TmpMod = 32 + QValue - 2*(abs(Src[j+1]-Src[j]));
385 if(TmpMod< -64 )
386 TmpMod = Sharpen;
388 else if(TmpMod<Low)
389 TmpMod = Low;
391 else if(TmpMod>High)
392 TmpMod = High;
394 LRMod[k*9+j] = (ogg_int16_t)TmpMod;
396 Src+=Pitch;
399 for(k=0;k<8;k++) {
400 for(j=0;j<8;j++){
401 atot = 128;
402 B = round;
403 p = curRow[ rowOffset +j+1];
405 pl = curRow[ rowOffset +j];
406 al = LRMod[k*9+j];
407 atot -= al;
408 B += al * pl;
410 pu = lastRow[ rowOffset +j];
411 au = UDMod[k*8+j];
412 atot -= au;
413 B += au * pu;
415 pd = nextRow[ rowOffset +j];
416 ad = UDMod[(k+1)*8+j];
417 atot -= ad;
418 B += ad * pd;
420 pr = curRow[ rowOffset +j+2];
421 ar = LRMod[k*9+j+1];
422 atot -= ar;
423 B += ar * pr;
425 newVal = ( atot * p + B) >> 7;
427 dstRow[ rowOffset +j] = clamp255( newVal );
430 rowOffset += Pitch;
434 static void DeringFrame(PB_INSTANCE *pbi,
435 unsigned char *Src, unsigned char *Dst){
436 ogg_uint32_t col,row;
437 unsigned char *SrcPtr;
438 unsigned char *DestPtr;
439 ogg_uint32_t BlocksAcross,BlocksDown;
440 const ogg_uint32_t *QuantScale;
441 ogg_uint32_t Block;
442 ogg_uint32_t LineLength;
444 ogg_int32_t Thresh1,Thresh2,Thresh3,Thresh4;
446 Thresh1 = 384;
447 Thresh2 = 4 * Thresh1;
448 Thresh3 = 5 * Thresh2/4;
449 Thresh4 = 5 * Thresh2/2;
451 QuantScale = DeringModifierV1;
453 BlocksAcross = pbi->HFragments;
454 BlocksDown = pbi->VFragments;
456 SrcPtr = Src + pbi->ReconYDataOffset;
457 DestPtr = Dst + pbi->ReconYDataOffset;
458 LineLength = pbi->YStride;
460 Block = 0;
462 for ( row = 0 ; row < BlocksDown; row ++){
463 for (col = 0; col < BlocksAcross; col ++){
464 ogg_uint32_t Quality = pbi->FragQIndex[Block];
465 ogg_int32_t Variance = pbi->FragmentVariances[Block];
467 if( pbi->PostProcessingLevel >5 && Variance > Thresh3 ){
468 DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col,
469 LineLength,Quality,QuantScale);
471 if( (col > 0 &&
472 pbi->FragmentVariances[Block-1] > Thresh4 ) ||
473 (col + 1 < BlocksAcross &&
474 pbi->FragmentVariances[Block+1] > Thresh4 ) ||
475 (row + 1 < BlocksDown &&
476 pbi->FragmentVariances[Block+BlocksAcross] > Thresh4) ||
477 (row > 0 &&
478 pbi->FragmentVariances[Block-BlocksAcross] > Thresh4) ){
480 DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col,
481 LineLength,Quality,QuantScale);
482 DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col,
483 LineLength,Quality,QuantScale);
485 } else if(Variance > Thresh2 ) {
487 DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col,
488 LineLength,Quality,QuantScale);
489 } else if(Variance > Thresh1 ) {
491 DeringBlockWeak(SrcPtr + 8 * col, DestPtr + 8 * col,
492 LineLength,Quality,QuantScale);
494 } else {
496 dsp_copy8x8(pbi->dsp, SrcPtr + 8 * col, DestPtr + 8 * col, LineLength);
500 ++Block;
503 SrcPtr += 8 * LineLength;
504 DestPtr += 8 * LineLength;
507 /* Then U */
509 BlocksAcross /= 2;
510 BlocksDown /= 2;
511 LineLength /= 2;
513 SrcPtr = Src + pbi->ReconUDataOffset;
514 DestPtr = Dst + pbi->ReconUDataOffset;
515 for ( row = 0 ; row < BlocksDown; row ++) {
516 for (col = 0; col < BlocksAcross; col ++) {
517 ogg_uint32_t Quality = pbi->FragQIndex[Block];
518 ogg_int32_t Variance = pbi->FragmentVariances[Block];
520 if( pbi->PostProcessingLevel >5 && Variance > Thresh4 ) {
521 DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col,
522 LineLength,Quality,QuantScale);
523 DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col,
524 LineLength,Quality,QuantScale);
525 DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col,
526 LineLength,Quality,QuantScale);
528 }else if(Variance > Thresh2 ){
529 DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col,
530 LineLength,Quality,QuantScale);
531 }else if(Variance > Thresh1 ){
532 DeringBlockWeak(SrcPtr + 8 * col, DestPtr + 8 * col,
533 LineLength,Quality,QuantScale);
534 }else{
535 dsp_copy8x8(pbi->dsp, SrcPtr + 8 * col, DestPtr + 8 * col, LineLength);
538 ++Block;
541 SrcPtr += 8 * LineLength;
542 DestPtr += 8 * LineLength;
545 /* Then V */
546 SrcPtr = Src + pbi->ReconVDataOffset;
547 DestPtr = Dst + pbi->ReconVDataOffset;
549 for ( row = 0 ; row < BlocksDown; row ++){
550 for (col = 0; col < BlocksAcross; col ++){
552 ogg_uint32_t Quality = pbi->FragQIndex[Block];
553 ogg_int32_t Variance = pbi->FragmentVariances[Block];
556 if( pbi->PostProcessingLevel >5 && Variance > Thresh4 ) {
557 DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col,
558 LineLength,Quality,QuantScale);
559 DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col,
560 LineLength,Quality,QuantScale);
561 DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col,
562 LineLength,Quality,QuantScale);
564 }else if(Variance > Thresh2 ){
565 DeringBlockStrong(SrcPtr + 8 * col, DestPtr + 8 * col,
566 LineLength,Quality,QuantScale);
567 }else if(Variance > Thresh1 ){
568 DeringBlockWeak(SrcPtr + 8 * col, DestPtr + 8 * col,
569 LineLength,Quality,QuantScale);
570 }else{
571 dsp_copy8x8(pbi->dsp, SrcPtr + 8 * col, DestPtr + 8 * col, LineLength);
574 ++Block;
577 SrcPtr += 8 * LineLength;
578 DestPtr += 8 * LineLength;
584 void UpdateFragQIndex(PB_INSTANCE *pbi){
586 ogg_uint32_t ThisFrameQIndex;
587 ogg_uint32_t i;
589 /* Check this frame quality index */
590 ThisFrameQIndex = pbi->FrameQIndex;
593 /* It is not a key frame, so only reset those are coded */
594 for( i = 0; i < pbi->UnitFragments; i++ )
595 if( pbi->display_fragments[i])
596 pbi->FragQIndex[i] = ThisFrameQIndex;
600 static void DeblockLoopFilteredBand(PB_INSTANCE *pbi,
601 unsigned char *SrcPtr,
602 unsigned char *DesPtr,
603 ogg_uint32_t PlaneLineStep,
604 ogg_uint32_t FragsAcross,
605 ogg_uint32_t StartFrag,
606 const ogg_uint32_t *QuantScale){
607 ogg_uint32_t j,k;
608 ogg_uint32_t CurrentFrag=StartFrag;
609 ogg_int32_t QStep;
610 ogg_int32_t FLimit;
611 unsigned char *Src, *Des;
612 ogg_int32_t x[10];
613 ogg_int32_t Sum1, Sum2;
615 while(CurrentFrag < StartFrag + FragsAcross){
617 Src=SrcPtr+8*(CurrentFrag-StartFrag)-PlaneLineStep*5;
618 Des=DesPtr+8*(CurrentFrag-StartFrag)-PlaneLineStep*4;
620 QStep = QuantScale[pbi->FragQIndex[CurrentFrag+FragsAcross]];
621 FLimit = ( QStep * 3 ) >> 2;
623 for( j=0; j<8 ; j++){
624 x[0] = Src[0];
625 x[1] = Src[PlaneLineStep];
626 x[2] = Src[PlaneLineStep*2];
627 x[3] = Src[PlaneLineStep*3];
628 x[4] = Src[PlaneLineStep*4];
629 x[5] = Src[PlaneLineStep*5];
630 x[6] = Src[PlaneLineStep*6];
631 x[7] = Src[PlaneLineStep*7];
632 x[8] = Src[PlaneLineStep*8];
633 x[9] = Src[PlaneLineStep*9];
635 Sum1=Sum2=0;
637 for(k=1;k<=4;k++){
638 Sum1 += abs(x[k]-x[k-1]);
639 Sum2 += abs(x[k+4]-x[k+5]);
642 pbi->FragmentVariances[CurrentFrag] +=((Sum1>255)?255:Sum1);
643 pbi->FragmentVariances[CurrentFrag + FragsAcross] += ((Sum2>255)?255:Sum2);
645 if( Sum1 < FLimit &&
646 Sum2 < FLimit &&
647 (x[5] - x[4]) < QStep &&
648 (x[4] - x[5]) < QStep ){
650 /* low pass filtering (LPF7: 1 1 1 2 1 1 1) */
651 Des[0 ] = (x[0] + x[0] +x[0] + x[1] * 2 +
652 x[2] + x[3] +x[4] + 4) >> 3;
653 Des[PlaneLineStep ] = (x[0] + x[0] +x[1] + x[2] * 2 +
654 x[3] + x[4] +x[5] + 4) >> 3;
655 Des[PlaneLineStep*2] = (x[0] + x[1] +x[2] + x[3] * 2 +
656 x[4] + x[5] +x[6] + 4) >> 3;
657 Des[PlaneLineStep*3] = (x[1] + x[2] +x[3] + x[4] * 2 +
658 x[5] + x[6] +x[7] + 4) >> 3;
659 Des[PlaneLineStep*4] = (x[2] + x[3] +x[4] + x[5] * 2 +
660 x[6] + x[7] +x[8] + 4) >> 3;
661 Des[PlaneLineStep*5] = (x[3] + x[4] +x[5] + x[6] * 2 +
662 x[7] + x[8] +x[9] + 4) >> 3;
663 Des[PlaneLineStep*6] = (x[4] + x[5] +x[6] + x[7] * 2 +
664 x[8] + x[9] +x[9] + 4) >> 3;
665 Des[PlaneLineStep*7] = (x[5] + x[6] +x[7] + x[8] * 2 +
666 x[9] + x[9] +x[9] + 4) >> 3;
668 }else {
669 /* copy the pixels to destination */
670 Des[0 ]= (unsigned char)x[1];
671 Des[PlaneLineStep ]= (unsigned char)x[2];
672 Des[PlaneLineStep*2]= (unsigned char)x[3];
673 Des[PlaneLineStep*3]= (unsigned char)x[4];
674 Des[PlaneLineStep*4]= (unsigned char)x[5];
675 Des[PlaneLineStep*5]= (unsigned char)x[6];
676 Des[PlaneLineStep*6]= (unsigned char)x[7];
677 Des[PlaneLineStep*7]= (unsigned char)x[8];
679 Src ++;
680 Des ++;
684 /* done with filtering the horizontal edge, now let's do the
685 vertical one */
686 /* skip the first one */
687 if(CurrentFrag==StartFrag)
688 CurrentFrag++;
689 else{
690 Des=DesPtr-8*PlaneLineStep+8*(CurrentFrag-StartFrag);
691 Src=Des-5;
692 Des-=4;
694 QStep = QuantScale[pbi->FragQIndex[CurrentFrag]];
695 FLimit = ( QStep * 3 ) >> 2;
697 for( j=0; j<8 ; j++){
698 x[0] = Src[0];
699 x[1] = Src[1];
700 x[2] = Src[2];
701 x[3] = Src[3];
702 x[4] = Src[4];
703 x[5] = Src[5];
704 x[6] = Src[6];
705 x[7] = Src[7];
706 x[8] = Src[8];
707 x[9] = Src[9];
709 Sum1=Sum2=0;
711 for(k=1;k<=4;k++){
712 Sum1 += abs(x[k]-x[k-1]);
713 Sum2 += abs(x[k+4]-x[k+5]);
716 pbi->FragmentVariances[CurrentFrag-1] += ((Sum1>255)?255:Sum1);
717 pbi->FragmentVariances[CurrentFrag] += ((Sum2>255)?255:Sum2);
719 if( Sum1 < FLimit &&
720 Sum2 < FLimit &&
721 (x[5] - x[4]) < QStep &&
722 (x[4] - x[5]) < QStep ){
724 /* low pass filtering (LPF7: 1 1 1 2 1 1 1) */
725 Des[0] = (x[0] + x[0] +x[0] + x[1] * 2 + x[2] + x[3] +x[4] + 4) >> 3;
726 Des[1] = (x[0] + x[0] +x[1] + x[2] * 2 + x[3] + x[4] +x[5] + 4) >> 3;
727 Des[2] = (x[0] + x[1] +x[2] + x[3] * 2 + x[4] + x[5] +x[6] + 4) >> 3;
728 Des[3] = (x[1] + x[2] +x[3] + x[4] * 2 + x[5] + x[6] +x[7] + 4) >> 3;
729 Des[4] = (x[2] + x[3] +x[4] + x[5] * 2 + x[6] + x[7] +x[8] + 4) >> 3;
730 Des[5] = (x[3] + x[4] +x[5] + x[6] * 2 + x[7] + x[8] +x[9] + 4) >> 3;
731 Des[6] = (x[4] + x[5] +x[6] + x[7] * 2 + x[8] + x[9] +x[9] + 4) >> 3;
732 Des[7] = (x[5] + x[6] +x[7] + x[8] * 2 + x[9] + x[9] +x[9] + 4) >> 3;
735 Src += PlaneLineStep;
736 Des += PlaneLineStep;
738 CurrentFrag ++;
743 static void DeblockVerticalEdgesInLoopFilteredBand(PB_INSTANCE *pbi,
744 unsigned char *SrcPtr,
745 unsigned char *DesPtr,
746 ogg_uint32_t PlaneLineStep,
747 ogg_uint32_t FragsAcross,
748 ogg_uint32_t StartFrag,
749 const ogg_uint32_t *QuantScale){
750 ogg_uint32_t j,k;
751 ogg_uint32_t CurrentFrag=StartFrag;
752 ogg_int32_t QStep;
753 ogg_int32_t FLimit;
754 unsigned char *Src, *Des;
755 ogg_int32_t x[10];
756 ogg_int32_t Sum1, Sum2;
758 while(CurrentFrag < StartFrag + FragsAcross-1) {
760 Src=SrcPtr+8*(CurrentFrag-StartFrag+1)-5;
761 Des=DesPtr+8*(CurrentFrag-StartFrag+1)-4;
763 QStep = QuantScale[pbi->FragQIndex[CurrentFrag+1]];
764 FLimit = ( QStep * 3)>>2 ;
766 for( j=0; j<8 ; j++){
767 x[0] = Src[0];
768 x[1] = Src[1];
769 x[2] = Src[2];
770 x[3] = Src[3];
771 x[4] = Src[4];
772 x[5] = Src[5];
773 x[6] = Src[6];
774 x[7] = Src[7];
775 x[8] = Src[8];
776 x[9] = Src[9];
778 Sum1=Sum2=0;
780 for(k=1;k<=4;k++){
781 Sum1 += abs(x[k]-x[k-1]);
782 Sum2 += abs(x[k+4]-x[k+5]);
785 pbi->FragmentVariances[CurrentFrag] += ((Sum1>255)?255:Sum1);
786 pbi->FragmentVariances[CurrentFrag+1] += ((Sum2>255)?255:Sum2);
789 if( Sum1 < FLimit &&
790 Sum2 < FLimit &&
791 (x[5] - x[4]) < QStep &&
792 (x[4] - x[5]) < QStep ){
794 /* low pass filtering (LPF7: 1 1 1 2 1 1 1) */
795 Des[0] = (x[0] + x[0] +x[0] + x[1] * 2 + x[2] + x[3] +x[4] + 4) >> 3;
796 Des[1] = (x[0] + x[0] +x[1] + x[2] * 2 + x[3] + x[4] +x[5] + 4) >> 3;
797 Des[2] = (x[0] + x[1] +x[2] + x[3] * 2 + x[4] + x[5] +x[6] + 4) >> 3;
798 Des[3] = (x[1] + x[2] +x[3] + x[4] * 2 + x[5] + x[6] +x[7] + 4) >> 3;
799 Des[4] = (x[2] + x[3] +x[4] + x[5] * 2 + x[6] + x[7] +x[8] + 4) >> 3;
800 Des[5] = (x[3] + x[4] +x[5] + x[6] * 2 + x[7] + x[8] +x[9] + 4) >> 3;
801 Des[6] = (x[4] + x[5] +x[6] + x[7] * 2 + x[8] + x[9] +x[9] + 4) >> 3;
802 Des[7] = (x[5] + x[6] +x[7] + x[8] * 2 + x[9] + x[9] +x[9] + 4) >> 3;
804 Src +=PlaneLineStep;
805 Des +=PlaneLineStep;
808 CurrentFrag ++;
812 static void DeblockPlane(PB_INSTANCE *pbi,
813 unsigned char *SourceBuffer,
814 unsigned char *DestinationBuffer,
815 ogg_uint32_t Channel ){
817 ogg_uint32_t i,k;
818 ogg_uint32_t PlaneLineStep=0;
819 ogg_uint32_t StartFrag =0;
820 ogg_uint32_t PixelIndex=0;
821 unsigned char * SrcPtr=0, * DesPtr=0;
822 ogg_uint32_t FragsAcross=0;
823 ogg_uint32_t FragsDown=0;
824 const ogg_uint32_t *QuantScale=0;
826 switch( Channel ){
827 case 0:
828 /* Get the parameters */
829 PlaneLineStep = pbi->YStride;
830 FragsAcross = pbi->HFragments;
831 FragsDown = pbi->VFragments;
832 StartFrag = 0;
833 PixelIndex = pbi->ReconYDataOffset;
834 SrcPtr = & SourceBuffer[PixelIndex];
835 DesPtr = & DestinationBuffer[PixelIndex];
836 break;
838 case 1:
839 /* Get the parameters */
840 PlaneLineStep = pbi->UVStride;
841 FragsAcross = pbi->HFragments / 2;
842 FragsDown = pbi->VFragments / 2;
843 StartFrag = pbi->YPlaneFragments;
845 PixelIndex = pbi->ReconUDataOffset;
846 SrcPtr = & SourceBuffer[PixelIndex];
847 DesPtr = & DestinationBuffer[PixelIndex];
848 break;
850 default:
851 /* Get the parameters */
852 PlaneLineStep = pbi->UVStride;
853 FragsAcross = pbi->HFragments / 2;
854 FragsDown = pbi->VFragments / 2;
855 StartFrag = pbi->YPlaneFragments + pbi->UVPlaneFragments;
857 PixelIndex = pbi->ReconVDataOffset;
858 SrcPtr = & SourceBuffer[PixelIndex];
859 DesPtr = & DestinationBuffer[PixelIndex];
860 break;
863 QuantScale = DcQuantScaleV1;
865 for(i=0;i<4;i++)
866 memcpy(DesPtr+i*PlaneLineStep, SrcPtr+i*PlaneLineStep, PlaneLineStep);
868 k = 1;
870 while( k < FragsDown ){
872 SrcPtr += 8*PlaneLineStep;
873 DesPtr += 8*PlaneLineStep;
875 /* Filter both the horizontal and vertical block edges inside the band */
876 DeblockLoopFilteredBand(pbi, SrcPtr, DesPtr, PlaneLineStep,
877 FragsAcross, StartFrag, QuantScale);
879 /* Move Pointers */
880 StartFrag += FragsAcross;
882 k ++;
885 /* The Last band */
886 for(i=0;i<4;i++)
887 memcpy(DesPtr+(i+4)*PlaneLineStep,
888 SrcPtr+(i+4)*PlaneLineStep,
889 PlaneLineStep);
891 DeblockVerticalEdgesInLoopFilteredBand(pbi,SrcPtr,DesPtr,PlaneLineStep,
892 FragsAcross,StartFrag,QuantScale);
896 static void DeblockFrame(PB_INSTANCE *pbi, unsigned char *SourceBuffer,
897 unsigned char *DestinationBuffer){
899 memset(pbi->FragmentVariances, 0 , sizeof(ogg_int32_t) * pbi->UnitFragments);
902 UpdateFragQIndex(pbi);
904 /* Y */
905 DeblockPlane( pbi, SourceBuffer, DestinationBuffer, 0);
907 /* U */
908 DeblockPlane( pbi, SourceBuffer, DestinationBuffer, 1);
910 /* V */
911 DeblockPlane( pbi, SourceBuffer, DestinationBuffer, 2);
915 void PostProcess(PB_INSTANCE *pbi){
917 switch (pbi->PostProcessingLevel){
918 case 8:
919 /* on a slow machine, use a simpler and faster deblocking filter */
920 DeblockFrame(pbi, pbi->LastFrameRecon,pbi->PostProcessBuffer);
921 break;
923 case 6:
924 DeblockFrame(pbi, pbi->LastFrameRecon,pbi->PostProcessBuffer);
925 UpdateUMVBorder(pbi, pbi->PostProcessBuffer );
926 DeringFrame(pbi, pbi->PostProcessBuffer, pbi->PostProcessBuffer);
927 break;
929 case 5:
930 DeblockFrame(pbi, pbi->LastFrameRecon,pbi->PostProcessBuffer);
931 UpdateUMVBorder(pbi, pbi->PostProcessBuffer );
932 DeringFrame(pbi, pbi->PostProcessBuffer, pbi->PostProcessBuffer);
933 break;
934 case 4:
935 DeblockFrame(pbi, pbi->LastFrameRecon, pbi->PostProcessBuffer);
936 break;
937 case 1:
938 UpdateFragQIndex(pbi);
939 break;
941 case 0:
942 break;
944 default:
945 DeblockFrame(pbi, pbi->LastFrameRecon, pbi->PostProcessBuffer);
946 UpdateUMVBorder(pbi, pbi->PostProcessBuffer );
947 DeringFrame(pbi, pbi->PostProcessBuffer, pbi->PostProcessBuffer);
948 break;