2 Copyright (C) 2002 Michael Niedermayer <michaelni@gmx.at>
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
27 #include "cpudetect.h"
33 #include "img_format.h"
36 #include "libvo/fastmemcpy.h"
38 #define MAX_NOISE 4096
39 #define MAX_SHIFT 1024
40 #define MAX_RES (MAX_NOISE-MAX_SHIFT)
42 //===========================================================================//
44 static inline void lineNoise_C(uint8_t *dst
, uint8_t *src
, int8_t *noise
, int len
, int shift
);
45 static inline void lineNoiseAvg_C(uint8_t *dst
, uint8_t *src
, int len
, int8_t **shift
);
47 static void (*lineNoise
)(uint8_t *dst
, uint8_t *src
, int8_t *noise
, int len
, int shift
)= lineNoise_C
;
48 static void (*lineNoiseAvg
)(uint8_t *dst
, uint8_t *src
, int len
, int8_t **shift
)= lineNoiseAvg_C
;
50 typedef struct FilterParam
{
59 int8_t *prev_shift
[MAX_RES
][3];
63 FilterParam lumaParam
;
64 FilterParam chromaParam
;
68 static int nonTempRandShift_init
;
69 static int nonTempRandShift
[MAX_RES
];
71 static int patt
[4] = {
75 #define RAND_N(range) ((int) ((double)range*rand()/(RAND_MAX+1.0)))
76 static int8_t *initNoise(FilterParam
*fp
){
77 int strength
= fp
->strength
;
78 int uniform
= fp
->uniform
;
79 int averaged
= fp
->averaged
;
80 int pattern
= fp
->pattern
;
81 int8_t *noise
= memalign(16, MAX_NOISE
*sizeof(int8_t));
86 for(i
=0,j
=0; i
<MAX_NOISE
; i
++,j
++)
91 noise
[i
]= (RAND_N(strength
) - strength
/2)/6
92 +patt
[j
%4]*strength
*0.25/3;
94 noise
[i
]= (RAND_N(strength
) - strength
/2)/3;
98 noise
[i
]= (RAND_N(strength
) - strength
/2)/2
99 + patt
[j
%4]*strength
*0.25;
101 noise
[i
]= RAND_N(strength
) - strength
/2;
105 double x1
, x2
, w
, y1
;
107 x1
= 2.0 * rand()/(float)RAND_MAX
- 1.0;
108 x2
= 2.0 * rand()/(float)RAND_MAX
- 1.0;
109 w
= x1
* x1
+ x2
* x2
;
110 } while ( w
>= 1.0 );
112 w
= sqrt( (-2.0 * log( w
) ) / w
);
114 y1
*= strength
/ sqrt(3.0);
117 y1
+= patt
[j
%4]*strength
*0.35;
119 if (y1
<-128) y1
=-128;
120 else if(y1
> 127) y1
= 127;
121 if (averaged
) y1
/= 3.0;
124 if (RAND_N(6) == 0) j
--;
128 for (i
= 0; i
< MAX_RES
; i
++)
129 for (j
= 0; j
< 3; j
++)
130 fp
->prev_shift
[i
][j
] = noise
+ (rand()&(MAX_SHIFT
-1));
132 if(!nonTempRandShift_init
){
133 for(i
=0; i
<MAX_RES
; i
++){
134 nonTempRandShift
[i
]= rand()&(MAX_SHIFT
-1);
136 nonTempRandShift_init
= 1;
144 /***************************************************************************/
147 static inline void lineNoise_MMX(uint8_t *dst
, uint8_t *src
, int8_t *noise
, int len
, int shift
){
148 long mmx_len
= len
&(~7);
152 "mov %3, %%"REG_a
" \n\t"
153 "pcmpeqb %%mm7, %%mm7 \n\t"
154 "psllw $15, %%mm7 \n\t"
155 "packsswb %%mm7, %%mm7 \n\t"
158 "movq (%0, %%"REG_a
"), %%mm0 \n\t"
159 "movq (%1, %%"REG_a
"), %%mm1 \n\t"
160 "pxor %%mm7, %%mm0 \n\t"
161 "paddsb %%mm1, %%mm0 \n\t"
162 "pxor %%mm7, %%mm0 \n\t"
163 "movq %%mm0, (%2, %%"REG_a
") \n\t"
164 "add $8, %%"REG_a
" \n\t"
166 :: "r" (src
+mmx_len
), "r" (noise
+mmx_len
), "r" (dst
+mmx_len
), "g" (-mmx_len
)
170 lineNoise_C(dst
+mmx_len
, src
+mmx_len
, noise
+mmx_len
, len
-mmx_len
, 0);
174 //duplicate of previous except movntq
176 static inline void lineNoise_MMX2(uint8_t *dst
, uint8_t *src
, int8_t *noise
, int len
, int shift
){
177 long mmx_len
= len
&(~7);
181 "mov %3, %%"REG_a
" \n\t"
182 "pcmpeqb %%mm7, %%mm7 \n\t"
183 "psllw $15, %%mm7 \n\t"
184 "packsswb %%mm7, %%mm7 \n\t"
187 "movq (%0, %%"REG_a
"), %%mm0 \n\t"
188 "movq (%1, %%"REG_a
"), %%mm1 \n\t"
189 "pxor %%mm7, %%mm0 \n\t"
190 "paddsb %%mm1, %%mm0 \n\t"
191 "pxor %%mm7, %%mm0 \n\t"
192 "movntq %%mm0, (%2, %%"REG_a
") \n\t"
193 "add $8, %%"REG_a
" \n\t"
195 :: "r" (src
+mmx_len
), "r" (noise
+mmx_len
), "r" (dst
+mmx_len
), "g" (-mmx_len
)
199 lineNoise_C(dst
+mmx_len
, src
+mmx_len
, noise
+mmx_len
, len
-mmx_len
, 0);
203 static inline void lineNoise_C(uint8_t *dst
, uint8_t *src
, int8_t *noise
, int len
, int shift
){
208 int v
= src
[i
]+ noise
[i
];
209 if(v
>255) dst
[i
]=255; //FIXME optimize
210 else if(v
<0) dst
[i
]=0;
215 /***************************************************************************/
218 static inline void lineNoiseAvg_MMX(uint8_t *dst
, uint8_t *src
, int len
, int8_t **shift
){
219 long mmx_len
= len
&(~7);
222 "mov %5, %%"REG_a
" \n\t"
225 "movq (%1, %%"REG_a
"), %%mm1 \n\t"
226 "movq (%0, %%"REG_a
"), %%mm0 \n\t"
227 "paddb (%2, %%"REG_a
"), %%mm1 \n\t"
228 "paddb (%3, %%"REG_a
"), %%mm1 \n\t"
229 "movq %%mm0, %%mm2 \n\t"
230 "movq %%mm1, %%mm3 \n\t"
231 "punpcklbw %%mm0, %%mm0 \n\t"
232 "punpckhbw %%mm2, %%mm2 \n\t"
233 "punpcklbw %%mm1, %%mm1 \n\t"
234 "punpckhbw %%mm3, %%mm3 \n\t"
235 "pmulhw %%mm0, %%mm1 \n\t"
236 "pmulhw %%mm2, %%mm3 \n\t"
237 "paddw %%mm1, %%mm1 \n\t"
238 "paddw %%mm3, %%mm3 \n\t"
239 "paddw %%mm0, %%mm1 \n\t"
240 "paddw %%mm2, %%mm3 \n\t"
241 "psrlw $8, %%mm1 \n\t"
242 "psrlw $8, %%mm3 \n\t"
243 "packuswb %%mm3, %%mm1 \n\t"
244 "movq %%mm1, (%4, %%"REG_a
") \n\t"
245 "add $8, %%"REG_a
" \n\t"
247 :: "r" (src
+mmx_len
), "r" (shift
[0]+mmx_len
), "r" (shift
[1]+mmx_len
), "r" (shift
[2]+mmx_len
),
248 "r" (dst
+mmx_len
), "g" (-mmx_len
)
253 int8_t *shift2
[3]={shift
[0]+mmx_len
, shift
[1]+mmx_len
, shift
[2]+mmx_len
};
254 lineNoiseAvg_C(dst
+mmx_len
, src
+mmx_len
, len
-mmx_len
, shift2
);
259 static inline void lineNoiseAvg_C(uint8_t *dst
, uint8_t *src
, int len
, int8_t **shift
){
261 int8_t *src2
= (int8_t*)src
;
265 const int n
= shift
[0][i
] + shift
[1][i
] + shift
[2][i
];
266 dst
[i
]= src2
[i
]+((n
*src2
[i
])>>7);
270 /***************************************************************************/
272 static void noise(uint8_t *dst
, uint8_t *src
, int dstStride
, int srcStride
, int width
, int height
, FilterParam
*fp
){
273 int8_t *noise
= fp
->noise
;
281 if(dstStride
==srcStride
) fast_memcpy(dst
, src
, srcStride
*height
);
284 for(y
=0; y
<height
; y
++)
286 fast_memcpy(dst
, src
, width
);
294 for(y
=0; y
<height
; y
++)
296 if(fp
->temporal
) shift
= rand()&(MAX_SHIFT
-1);
297 else shift
= nonTempRandShift
[y
];
299 if(fp
->quality
==0) shift
&= ~7;
301 lineNoiseAvg(dst
, src
, width
, fp
->prev_shift
[y
]);
302 fp
->prev_shift
[y
][fp
->shiftptr
] = noise
+ shift
;
304 lineNoise(dst
, src
, noise
, width
, shift
);
310 if (fp
->shiftptr
== 3) fp
->shiftptr
= 0;
313 static int config(struct vf_instance_s
* vf
,
314 int width
, int height
, int d_width
, int d_height
,
315 unsigned int flags
, unsigned int outfmt
){
317 return vf_next_config(vf
,width
,height
,d_width
,d_height
,flags
,outfmt
);
320 static void get_image(struct vf_instance_s
* vf
, mp_image_t
*mpi
){
321 if(mpi
->flags
&MP_IMGFLAG_PRESERVE
) return; // don't change
322 if(mpi
->imgfmt
!=vf
->priv
->outfmt
) return; // colorspace differ
323 // ok, we can do pp in-place (or pp disabled):
324 vf
->dmpi
=vf_get_image(vf
->next
,mpi
->imgfmt
,
325 mpi
->type
, mpi
->flags
, mpi
->w
, mpi
->h
);
326 mpi
->planes
[0]=vf
->dmpi
->planes
[0];
327 mpi
->stride
[0]=vf
->dmpi
->stride
[0];
328 mpi
->width
=vf
->dmpi
->width
;
329 if(mpi
->flags
&MP_IMGFLAG_PLANAR
){
330 mpi
->planes
[1]=vf
->dmpi
->planes
[1];
331 mpi
->planes
[2]=vf
->dmpi
->planes
[2];
332 mpi
->stride
[1]=vf
->dmpi
->stride
[1];
333 mpi
->stride
[2]=vf
->dmpi
->stride
[2];
335 mpi
->flags
|=MP_IMGFLAG_DIRECT
;
338 static int put_image(struct vf_instance_s
* vf
, mp_image_t
*mpi
, double pts
){
341 if(!(mpi
->flags
&MP_IMGFLAG_DIRECT
)){
342 // no DR, so get a new image! hope we'll get DR buffer:
343 vf
->dmpi
=vf_get_image(vf
->next
,vf
->priv
->outfmt
,
344 MP_IMGTYPE_TEMP
, MP_IMGFLAG_ACCEPT_STRIDE
,
348 //else printf("dr\n");
351 noise(dmpi
->planes
[0], mpi
->planes
[0], dmpi
->stride
[0], mpi
->stride
[0], mpi
->w
, mpi
->h
, &vf
->priv
->lumaParam
);
352 noise(dmpi
->planes
[1], mpi
->planes
[1], dmpi
->stride
[1], mpi
->stride
[1], mpi
->w
/2, mpi
->h
/2, &vf
->priv
->chromaParam
);
353 noise(dmpi
->planes
[2], mpi
->planes
[2], dmpi
->stride
[2], mpi
->stride
[2], mpi
->w
/2, mpi
->h
/2, &vf
->priv
->chromaParam
);
355 vf_clone_mpi_attributes(dmpi
, mpi
);
358 if(gCpuCaps
.hasMMX
) asm volatile ("emms\n\t");
361 if(gCpuCaps
.hasMMX2
) asm volatile ("sfence\n\t");
364 return vf_next_put_image(vf
,dmpi
, pts
);
367 static void uninit(struct vf_instance_s
* vf
){
368 if(!vf
->priv
) return;
370 if(vf
->priv
->chromaParam
.noise
) free(vf
->priv
->chromaParam
.noise
);
371 vf
->priv
->chromaParam
.noise
= NULL
;
373 if(vf
->priv
->lumaParam
.noise
) free(vf
->priv
->lumaParam
.noise
);
374 vf
->priv
->lumaParam
.noise
= NULL
;
380 //===========================================================================//
382 static int query_format(struct vf_instance_s
* vf
, unsigned int fmt
){
388 return vf_next_query_format(vf
,vf
->priv
->outfmt
);
393 static void parse(FilterParam
*fp
, char* args
){
395 char *max
= strchr(args
, ':');
397 if(!max
) max
= args
+ strlen(args
);
399 fp
->strength
= atoi(args
);
400 pos
= strchr(args
, 'u');
401 if(pos
&& pos
<max
) fp
->uniform
=1;
402 pos
= strchr(args
, 't');
403 if(pos
&& pos
<max
) fp
->temporal
=1;
404 pos
= strchr(args
, 'h');
405 if(pos
&& pos
<max
) fp
->quality
=1;
406 pos
= strchr(args
, 'p');
407 if(pos
&& pos
<max
) fp
->pattern
=1;
408 pos
= strchr(args
, 'a');
414 if(fp
->strength
) initNoise(fp
);
417 static unsigned int fmt_list
[]={
424 static int open(vf_instance_t
*vf
, char* args
){
426 vf
->put_image
=put_image
;
427 vf
->get_image
=get_image
;
428 vf
->query_format
=query_format
;
430 vf
->priv
=malloc(sizeof(struct vf_priv_s
));
431 memset(vf
->priv
, 0, sizeof(struct vf_priv_s
));
434 char *arg2
= strchr(args
,':');
435 if(arg2
) parse(&vf
->priv
->chromaParam
, arg2
+1);
436 parse(&vf
->priv
->lumaParam
, args
);
440 vf
->priv
->outfmt
=vf_match_csp(&vf
->next
,fmt_list
,IMGFMT_YV12
);
441 if(!vf
->priv
->outfmt
)
444 return 0; // no csp match :(
450 lineNoise
= lineNoise_MMX
;
451 lineNoiseAvg
= lineNoiseAvg_MMX
;
455 if(gCpuCaps
.hasMMX2
) lineNoise
= lineNoise_MMX2
;
456 // if(gCpuCaps.hasMMX) lineNoiseAvg= lineNoiseAvg_MMX2;
462 const vf_info_t vf_info_noise
= {
465 "Michael Niedermayer",
471 //===========================================================================//