2 Copyright (C) 2002 Michael Niedermayer <michaelni@gmx.at>
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
27 #include "cpudetect.h"
34 #include "img_format.h"
37 #include "libvo/fastmemcpy.h"
39 #define MAX_NOISE 4096
40 #define MAX_SHIFT 1024
41 #define MAX_RES (MAX_NOISE-MAX_SHIFT)
43 //===========================================================================//
45 static inline void lineNoise_C(uint8_t *dst
, uint8_t *src
, int8_t *noise
, int len
, int shift
);
46 static inline void lineNoiseAvg_C(uint8_t *dst
, uint8_t *src
, int len
, int8_t **shift
);
48 static void (*lineNoise
)(uint8_t *dst
, uint8_t *src
, int8_t *noise
, int len
, int shift
)= lineNoise_C
;
49 static void (*lineNoiseAvg
)(uint8_t *dst
, uint8_t *src
, int len
, int8_t **shift
)= lineNoiseAvg_C
;
51 typedef struct FilterParam
{
60 int8_t *prev_shift
[MAX_RES
][3];
64 FilterParam lumaParam
;
65 FilterParam chromaParam
;
69 static int nonTempRandShift_init
;
70 static int nonTempRandShift
[MAX_RES
];
72 static int patt
[4] = {
76 #define RAND_N(range) ((int) ((double)range*rand()/(RAND_MAX+1.0)))
77 static int8_t *initNoise(FilterParam
*fp
){
78 int strength
= fp
->strength
;
79 int uniform
= fp
->uniform
;
80 int averaged
= fp
->averaged
;
81 int pattern
= fp
->pattern
;
82 int8_t *noise
= memalign(16, MAX_NOISE
*sizeof(int8_t));
87 for(i
=0,j
=0; i
<MAX_NOISE
; i
++,j
++)
92 noise
[i
]= (RAND_N(strength
) - strength
/2)/6
93 +patt
[j
%4]*strength
*0.25/3;
95 noise
[i
]= (RAND_N(strength
) - strength
/2)/3;
99 noise
[i
]= (RAND_N(strength
) - strength
/2)/2
100 + patt
[j
%4]*strength
*0.25;
102 noise
[i
]= RAND_N(strength
) - strength
/2;
106 double x1
, x2
, w
, y1
;
108 x1
= 2.0 * rand()/(float)RAND_MAX
- 1.0;
109 x2
= 2.0 * rand()/(float)RAND_MAX
- 1.0;
110 w
= x1
* x1
+ x2
* x2
;
111 } while ( w
>= 1.0 );
113 w
= sqrt( (-2.0 * log( w
) ) / w
);
115 y1
*= strength
/ sqrt(3.0);
118 y1
+= patt
[j
%4]*strength
*0.35;
120 if (y1
<-128) y1
=-128;
121 else if(y1
> 127) y1
= 127;
122 if (averaged
) y1
/= 3.0;
125 if (RAND_N(6) == 0) j
--;
129 for (i
= 0; i
< MAX_RES
; i
++)
130 for (j
= 0; j
< 3; j
++)
131 fp
->prev_shift
[i
][j
] = noise
+ (rand()&(MAX_SHIFT
-1));
133 if(!nonTempRandShift_init
){
134 for(i
=0; i
<MAX_RES
; i
++){
135 nonTempRandShift
[i
]= rand()&(MAX_SHIFT
-1);
137 nonTempRandShift_init
= 1;
145 /***************************************************************************/
148 static inline void lineNoise_MMX(uint8_t *dst
, uint8_t *src
, int8_t *noise
, int len
, int shift
){
149 long mmx_len
= len
&(~7);
153 "mov %3, %%"REG_a
" \n\t"
154 "pcmpeqb %%mm7, %%mm7 \n\t"
155 "psllw $15, %%mm7 \n\t"
156 "packsswb %%mm7, %%mm7 \n\t"
159 "movq (%0, %%"REG_a
"), %%mm0 \n\t"
160 "movq (%1, %%"REG_a
"), %%mm1 \n\t"
161 "pxor %%mm7, %%mm0 \n\t"
162 "paddsb %%mm1, %%mm0 \n\t"
163 "pxor %%mm7, %%mm0 \n\t"
164 "movq %%mm0, (%2, %%"REG_a
") \n\t"
165 "add $8, %%"REG_a
" \n\t"
167 :: "r" (src
+mmx_len
), "r" (noise
+mmx_len
), "r" (dst
+mmx_len
), "g" (-mmx_len
)
171 lineNoise_C(dst
+mmx_len
, src
+mmx_len
, noise
+mmx_len
, len
-mmx_len
, 0);
175 //duplicate of previous except movntq
177 static inline void lineNoise_MMX2(uint8_t *dst
, uint8_t *src
, int8_t *noise
, int len
, int shift
){
178 long mmx_len
= len
&(~7);
182 "mov %3, %%"REG_a
" \n\t"
183 "pcmpeqb %%mm7, %%mm7 \n\t"
184 "psllw $15, %%mm7 \n\t"
185 "packsswb %%mm7, %%mm7 \n\t"
188 "movq (%0, %%"REG_a
"), %%mm0 \n\t"
189 "movq (%1, %%"REG_a
"), %%mm1 \n\t"
190 "pxor %%mm7, %%mm0 \n\t"
191 "paddsb %%mm1, %%mm0 \n\t"
192 "pxor %%mm7, %%mm0 \n\t"
193 "movntq %%mm0, (%2, %%"REG_a
") \n\t"
194 "add $8, %%"REG_a
" \n\t"
196 :: "r" (src
+mmx_len
), "r" (noise
+mmx_len
), "r" (dst
+mmx_len
), "g" (-mmx_len
)
200 lineNoise_C(dst
+mmx_len
, src
+mmx_len
, noise
+mmx_len
, len
-mmx_len
, 0);
204 static inline void lineNoise_C(uint8_t *dst
, uint8_t *src
, int8_t *noise
, int len
, int shift
){
209 int v
= src
[i
]+ noise
[i
];
210 if(v
>255) dst
[i
]=255; //FIXME optimize
211 else if(v
<0) dst
[i
]=0;
216 /***************************************************************************/
219 static inline void lineNoiseAvg_MMX(uint8_t *dst
, uint8_t *src
, int len
, int8_t **shift
){
220 long mmx_len
= len
&(~7);
223 "mov %5, %%"REG_a
" \n\t"
226 "movq (%1, %%"REG_a
"), %%mm1 \n\t"
227 "movq (%0, %%"REG_a
"), %%mm0 \n\t"
228 "paddb (%2, %%"REG_a
"), %%mm1 \n\t"
229 "paddb (%3, %%"REG_a
"), %%mm1 \n\t"
230 "movq %%mm0, %%mm2 \n\t"
231 "movq %%mm1, %%mm3 \n\t"
232 "punpcklbw %%mm0, %%mm0 \n\t"
233 "punpckhbw %%mm2, %%mm2 \n\t"
234 "punpcklbw %%mm1, %%mm1 \n\t"
235 "punpckhbw %%mm3, %%mm3 \n\t"
236 "pmulhw %%mm0, %%mm1 \n\t"
237 "pmulhw %%mm2, %%mm3 \n\t"
238 "paddw %%mm1, %%mm1 \n\t"
239 "paddw %%mm3, %%mm3 \n\t"
240 "paddw %%mm0, %%mm1 \n\t"
241 "paddw %%mm2, %%mm3 \n\t"
242 "psrlw $8, %%mm1 \n\t"
243 "psrlw $8, %%mm3 \n\t"
244 "packuswb %%mm3, %%mm1 \n\t"
245 "movq %%mm1, (%4, %%"REG_a
") \n\t"
246 "add $8, %%"REG_a
" \n\t"
248 :: "r" (src
+mmx_len
), "r" (shift
[0]+mmx_len
), "r" (shift
[1]+mmx_len
), "r" (shift
[2]+mmx_len
),
249 "r" (dst
+mmx_len
), "g" (-mmx_len
)
254 int8_t *shift2
[3]={shift
[0]+mmx_len
, shift
[1]+mmx_len
, shift
[2]+mmx_len
};
255 lineNoiseAvg_C(dst
+mmx_len
, src
+mmx_len
, len
-mmx_len
, shift2
);
260 static inline void lineNoiseAvg_C(uint8_t *dst
, uint8_t *src
, int len
, int8_t **shift
){
262 int8_t *src2
= (int8_t*)src
;
266 const int n
= shift
[0][i
] + shift
[1][i
] + shift
[2][i
];
267 dst
[i
]= src2
[i
]+((n
*src2
[i
])>>7);
271 /***************************************************************************/
273 static void noise(uint8_t *dst
, uint8_t *src
, int dstStride
, int srcStride
, int width
, int height
, FilterParam
*fp
){
274 int8_t *noise
= fp
->noise
;
282 if(dstStride
==srcStride
) memcpy(dst
, src
, srcStride
*height
);
285 for(y
=0; y
<height
; y
++)
287 memcpy(dst
, src
, width
);
295 for(y
=0; y
<height
; y
++)
297 if(fp
->temporal
) shift
= rand()&(MAX_SHIFT
-1);
298 else shift
= nonTempRandShift
[y
];
300 if(fp
->quality
==0) shift
&= ~7;
302 lineNoiseAvg(dst
, src
, width
, fp
->prev_shift
[y
]);
303 fp
->prev_shift
[y
][fp
->shiftptr
] = noise
+ shift
;
305 lineNoise(dst
, src
, noise
, width
, shift
);
311 if (fp
->shiftptr
== 3) fp
->shiftptr
= 0;
314 static int config(struct vf_instance_s
* vf
,
315 int width
, int height
, int d_width
, int d_height
,
316 unsigned int flags
, unsigned int outfmt
){
318 return vf_next_config(vf
,width
,height
,d_width
,d_height
,flags
,outfmt
);
321 static void get_image(struct vf_instance_s
* vf
, mp_image_t
*mpi
){
322 if(mpi
->flags
&MP_IMGFLAG_PRESERVE
) return; // don't change
323 if(mpi
->imgfmt
!=vf
->priv
->outfmt
) return; // colorspace differ
324 // ok, we can do pp in-place (or pp disabled):
325 vf
->dmpi
=vf_get_image(vf
->next
,mpi
->imgfmt
,
326 mpi
->type
, mpi
->flags
, mpi
->w
, mpi
->h
);
327 mpi
->planes
[0]=vf
->dmpi
->planes
[0];
328 mpi
->stride
[0]=vf
->dmpi
->stride
[0];
329 mpi
->width
=vf
->dmpi
->width
;
330 if(mpi
->flags
&MP_IMGFLAG_PLANAR
){
331 mpi
->planes
[1]=vf
->dmpi
->planes
[1];
332 mpi
->planes
[2]=vf
->dmpi
->planes
[2];
333 mpi
->stride
[1]=vf
->dmpi
->stride
[1];
334 mpi
->stride
[2]=vf
->dmpi
->stride
[2];
336 mpi
->flags
|=MP_IMGFLAG_DIRECT
;
339 static int put_image(struct vf_instance_s
* vf
, mp_image_t
*mpi
, double pts
){
342 if(!(mpi
->flags
&MP_IMGFLAG_DIRECT
)){
343 // no DR, so get a new image! hope we'll get DR buffer:
344 vf
->dmpi
=vf_get_image(vf
->next
,vf
->priv
->outfmt
,
345 MP_IMGTYPE_TEMP
, MP_IMGFLAG_ACCEPT_STRIDE
,
349 //else printf("dr\n");
352 noise(dmpi
->planes
[0], mpi
->planes
[0], dmpi
->stride
[0], mpi
->stride
[0], mpi
->w
, mpi
->h
, &vf
->priv
->lumaParam
);
353 noise(dmpi
->planes
[1], mpi
->planes
[1], dmpi
->stride
[1], mpi
->stride
[1], mpi
->w
/2, mpi
->h
/2, &vf
->priv
->chromaParam
);
354 noise(dmpi
->planes
[2], mpi
->planes
[2], dmpi
->stride
[2], mpi
->stride
[2], mpi
->w
/2, mpi
->h
/2, &vf
->priv
->chromaParam
);
356 vf_clone_mpi_attributes(dmpi
, mpi
);
359 if(gCpuCaps
.hasMMX
) asm volatile ("emms\n\t");
362 if(gCpuCaps
.hasMMX2
) asm volatile ("sfence\n\t");
365 return vf_next_put_image(vf
,dmpi
, pts
);
368 static void uninit(struct vf_instance_s
* vf
){
369 if(!vf
->priv
) return;
371 if(vf
->priv
->chromaParam
.noise
) free(vf
->priv
->chromaParam
.noise
);
372 vf
->priv
->chromaParam
.noise
= NULL
;
374 if(vf
->priv
->lumaParam
.noise
) free(vf
->priv
->lumaParam
.noise
);
375 vf
->priv
->lumaParam
.noise
= NULL
;
381 //===========================================================================//
383 static int query_format(struct vf_instance_s
* vf
, unsigned int fmt
){
389 return vf_next_query_format(vf
,vf
->priv
->outfmt
);
394 static void parse(FilterParam
*fp
, char* args
){
396 char *max
= strchr(args
, ':');
398 if(!max
) max
= args
+ strlen(args
);
400 fp
->strength
= atoi(args
);
401 pos
= strchr(args
, 'u');
402 if(pos
&& pos
<max
) fp
->uniform
=1;
403 pos
= strchr(args
, 't');
404 if(pos
&& pos
<max
) fp
->temporal
=1;
405 pos
= strchr(args
, 'h');
406 if(pos
&& pos
<max
) fp
->quality
=1;
407 pos
= strchr(args
, 'p');
408 if(pos
&& pos
<max
) fp
->pattern
=1;
409 pos
= strchr(args
, 'a');
415 if(fp
->strength
) initNoise(fp
);
418 static unsigned int fmt_list
[]={
425 static int open(vf_instance_t
*vf
, char* args
){
427 vf
->put_image
=put_image
;
428 vf
->get_image
=get_image
;
429 vf
->query_format
=query_format
;
431 vf
->priv
=malloc(sizeof(struct vf_priv_s
));
432 memset(vf
->priv
, 0, sizeof(struct vf_priv_s
));
435 char *arg2
= strchr(args
,':');
436 if(arg2
) parse(&vf
->priv
->chromaParam
, arg2
+1);
437 parse(&vf
->priv
->lumaParam
, args
);
441 vf
->priv
->outfmt
=vf_match_csp(&vf
->next
,fmt_list
,IMGFMT_YV12
);
442 if(!vf
->priv
->outfmt
)
445 return 0; // no csp match :(
451 lineNoise
= lineNoise_MMX
;
452 lineNoiseAvg
= lineNoiseAvg_MMX
;
456 if(gCpuCaps
.hasMMX2
) lineNoise
= lineNoise_MMX2
;
457 // if(gCpuCaps.hasMMX) lineNoiseAvg= lineNoiseAvg_MMX2;
463 vf_info_t vf_info_noise
= {
466 "Michael Niedermayer",
472 //===========================================================================//