1 /*****************************************************************************
2 * grain.c: add film grain
3 *****************************************************************************
4 * Copyright (C) 2010 Laurent Aimar
7 * Authors: Laurent Aimar <fenrir _AT_ videolan _DOT_ org>
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms of the GNU Lesser General Public License as published by
11 * the Free Software Foundation; either version 2.1 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public License
20 * along with this program; if not, write to the Free Software Foundation,
21 * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
22 *****************************************************************************/
24 /*****************************************************************************
26 *****************************************************************************/
34 #include <vlc_common.h>
35 #include <vlc_plugin.h>
36 #include <vlc_filter.h>
38 #include <vlc_picture.h>
41 /*****************************************************************************
43 *****************************************************************************/
44 static int Open (vlc_object_t
*);
45 static void Close(vlc_object_t
*);
47 #define BANK_SIZE (64)
49 #define CFG_PREFIX "grain-"
51 #define VARIANCE_MIN (0.0)
52 #define VARIANCE_MAX (10.0)
53 #define VARIANCE_TEXT N_("Variance")
54 #define VARIANCE_LONGTEXT N_("Variance of the gaussian noise")
57 #define PERIOD_MAX BANK_SIZE
58 #define PERIOD_MIN_TEXT N_("Minimal period")
59 #define PERIOD_MIN_LONGTEXT N_("Minimal period of the noise grain in pixel")
60 #define PERIOD_MAX_TEXT N_("Maximal period")
61 #define PERIOD_MAX_LONGTEXT N_("Maximal period of the noise grain in pixel")
64 set_description(N_("Grain video filter"))
65 set_shortname( N_("Grain"))
66 set_help(N_("Adds filtered gaussian noise"))
67 set_capability( "video filter", 0 )
68 set_category(CAT_VIDEO
)
69 set_subcategory(SUBCAT_VIDEO_VFILTER
)
70 add_float_with_range(CFG_PREFIX
"variance", 2.0, VARIANCE_MIN
, VARIANCE_MAX
,
71 VARIANCE_TEXT
, VARIANCE_LONGTEXT
, false)
72 add_integer_with_range(CFG_PREFIX
"period-min", 1, PERIOD_MIN
, PERIOD_MAX
,
73 PERIOD_MIN_TEXT
, PERIOD_MIN_LONGTEXT
, false)
74 add_integer_with_range(CFG_PREFIX
"period-max", 3*PERIOD_MAX
/4, PERIOD_MIN
, PERIOD_MAX
,
75 PERIOD_MAX_TEXT
, PERIOD_MAX_LONGTEXT
, false)
76 set_callbacks(Open
, Close
)
79 /*****************************************************************************
81 *****************************************************************************/
83 #define BLEND_SIZE (8)
89 int16_t bank
[BANK_SIZE
* BANK_SIZE
];
90 int16_t bank_y
[BANK_SIZE
* BANK_SIZE
];
91 int16_t bank_uv
[BANK_SIZE
* BANK_SIZE
];
93 void (*blend
)(uint8_t *dst
, size_t dst_pitch
,
94 const uint8_t *src
, size_t src_pitch
,
95 const int16_t *noise
);
104 /* Simple and *really fast* RNG (xorshift[13,17,5])*/
105 #define URAND_SEED (2463534242)
106 static uint32_t urand(uint32_t *seed
)
114 /* Uniform random value between 0 and 1 */
115 static double drand(uint32_t *seed
)
117 return urand(seed
) / (double)UINT32_MAX
;
119 /* Gaussian random value with a mean of 0 and a variance of 1 */
120 static void grand(double *r1
, double *r2
, uint32_t *seed
)
125 u1
= 2 * drand(seed
) - 1;
126 u2
= 2 * drand(seed
) - 1;
127 s
= u1
* u1
+ u2
* u2
;
130 s
= sqrt(-2 * log(s
) / s
);
135 static void BlockBlend(uint8_t *dst
, size_t dst_pitch
,
136 const uint8_t *src
, size_t src_pitch
,
137 const int16_t *noise
,
140 for (int y
= 0; y
< h
; y
++) {
141 for (int x
= 0; x
< w
; x
++) {
142 dst
[y
* dst_pitch
+ x
] =
143 clip_uint8_vlc(src
[y
* src_pitch
+ x
] + noise
[y
* BANK_SIZE
+x
]);
148 static void BlockBlendC(uint8_t *dst
, size_t dst_pitch
,
149 const uint8_t *src
, size_t src_pitch
,
150 const int16_t *noise
)
152 BlockBlend(dst
, dst_pitch
, src
, src_pitch
, noise
,
153 BLEND_SIZE
, BLEND_SIZE
);
156 #ifdef CAN_COMPILE_SSE2
157 #define STRING_EXPAND(x) #x
158 #define STRING(x) STRING_EXPAND(x)
160 static void BlockBlendSse2(uint8_t *dst
, size_t dst_pitch
,
161 const uint8_t *src
, size_t src_pitch
,
162 const int16_t *noise
)
165 /* TODO It is possible to do the math on 8 bits using
166 * paddusb X and then psubusb -X.
168 asm volatile ("pxor %%xmm0, %%xmm0\n" : :);
169 for (int i
= 0; i
< 8/2; i
++) {
171 "movq (%[src1]), %%xmm1\n"
172 "movq (%[src2]), %%xmm3\n"
173 "movdqu (%[noise]), %%xmm2\n"
174 "movdqu 2*"STRING(BANK_SIZE
)"(%[noise]), %%xmm4\n"
176 "punpcklbw %%xmm0, %%xmm1\n"
177 "punpcklbw %%xmm0, %%xmm3\n"
179 "paddsw %%xmm2, %%xmm1\n"
180 "paddsw %%xmm4, %%xmm3\n"
181 "packuswb %%xmm1, %%xmm1\n"
182 "packuswb %%xmm3, %%xmm3\n"
183 "movq %%xmm1, (%[dst1])\n"
184 "movq %%xmm3, (%[dst2])\n"
185 : : [dst1
]"r"(&dst
[(2*i
+0) * dst_pitch
]),
186 [dst2
]"r"(&dst
[(2*i
+1) * dst_pitch
]),
187 [src1
]"r"(&src
[(2*i
+0) * src_pitch
]),
188 [src2
]"r"(&src
[(2*i
+1) * src_pitch
]),
189 [noise
]"r"(&noise
[2*i
* BANK_SIZE
])
190 : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "memory");
193 # error "BLEND_SIZE unsupported"
196 static void Emms(void)
198 asm volatile ("emms");
203 * Scale the given signed data (on 7 bits + 1 for sign) using scale on 8 bits.
205 static void Scale(int16_t *dst
, int16_t *src
, int scale
)
207 const int N
= BANK_SIZE
;
208 const int shift
= 7 + 8;
210 for (int y
= 0; y
< N
; y
++) {
211 for (int x
= 0; x
< N
; x
++) {
212 const int v
= src
[y
* N
+ x
];
215 vq
= ( v
* scale
+ (1 << (shift
-1)) - 1) >> shift
;
217 vq
= -((-v
* scale
+ (1 << (shift
-1)) - 1) >> shift
);
223 static void PlaneFilter(filter_t
*filter
,
224 plane_t
*dst
, const plane_t
*src
,
225 int16_t *bank
, uint32_t *seed
)
227 filter_sys_t
*sys
= filter
->p_sys
;
229 for (int y
= 0; y
< dst
->i_visible_lines
; y
+= BLEND_SIZE
) {
230 for (int x
= 0; x
< dst
->i_visible_pitch
; x
+= BLEND_SIZE
) {
231 int bx
= urand(seed
) % (BANK_SIZE
- BLEND_SIZE
+ 1);
232 int by
= urand(seed
) % (BANK_SIZE
- BLEND_SIZE
+ 1);
233 const int16_t *noise
= &bank
[by
* BANK_SIZE
+ bx
];
235 int w
= dst
->i_visible_pitch
- x
;
236 int h
= dst
->i_visible_lines
- y
;
238 const uint8_t *srcp
= &src
->p_pixels
[y
* src
->i_pitch
+ x
];
239 uint8_t *dstp
= &dst
->p_pixels
[y
* dst
->i_pitch
+ x
];
241 if (w
>= BLEND_SIZE
&& h
>= BLEND_SIZE
)
242 sys
->blend(dstp
, dst
->i_pitch
, srcp
, src
->i_pitch
, noise
);
244 BlockBlend(dstp
, dst
->i_pitch
, srcp
, src
->i_pitch
, noise
,
245 __MIN(w
, BLEND_SIZE
), __MIN(h
, BLEND_SIZE
));
252 static picture_t
*Filter(filter_t
*filter
, picture_t
*src
)
254 filter_sys_t
*sys
= filter
->p_sys
;
256 picture_t
*dst
= filter_NewPicture(filter
);
258 picture_Release(src
);
262 vlc_mutex_lock(&sys
->cfg
.lock
);
263 const double variance
= VLC_CLIP(sys
->cfg
.variance
, VARIANCE_MIN
, VARIANCE_MAX
);
264 vlc_mutex_unlock(&sys
->cfg
.lock
);
266 const int scale
= 256 * sqrt(variance
);
267 if (scale
!= sys
->scale
) {
269 Scale(sys
->bank_y
, sys
->bank
, sys
->scale
);
270 Scale(sys
->bank_uv
, sys
->bank
, sys
->scale
/ 2);
273 for (int i
= 0; i
< dst
->i_planes
; i
++) {
274 const plane_t
*srcp
= &src
->p
[i
];
275 plane_t
*dstp
= &dst
->p
[i
];
277 if (i
== 0 || sys
->is_uv_filtered
) {
278 int16_t *bank
= i
== 0 ? sys
->bank_y
:
280 PlaneFilter(filter
, dstp
, srcp
, bank
, &sys
->seed
);
283 plane_CopyPixels(dstp
, srcp
);
287 picture_CopyProperties(dst
, src
);
288 picture_Release(src
);
293 * Generate a filteried gaussian noise within [-127, 127] range.
295 static int Generate(int16_t *bank
, int h_min
, int h_max
, int v_min
, int v_max
)
297 const int N
= BANK_SIZE
;
298 double *workspace
= calloc(3 * N
* N
, sizeof(*workspace
));
302 double *gn
= &workspace
[0 * N
* N
];
303 double *cij
= &workspace
[1 * N
* N
];
304 double *tmp
= &workspace
[2 * N
* N
];
306 /* Create a gaussian noise matrix */
307 assert((N
% 2) == 0);
308 uint32_t seed
= URAND_SEED
;
309 for (int y
= 0; y
< N
; y
++) {
310 for (int x
= 0; x
< N
/2; x
++) {
311 grand(&gn
[y
* N
+ 2 * x
+ 0], &gn
[y
* N
+ 2 * x
+ 1], &seed
);
315 /* Clear non selected frequency.
316 * Only the central band is kept */
318 for (int y
= 0; y
< N
; y
++) {
319 for (int x
= 0; x
< N
; x
++) {
320 if ((x
< h_min
&& y
< v_min
) || x
> h_max
|| y
> v_max
) {
326 const double correction
= sqrt((double)N
* N
/ (N
* N
- zero
));
328 /* Filter the gaussian noise using an IDCT
329 * The algo is simple/stupid and does C * GN * Ct */
330 for (int i
= 0; i
< N
; i
++) {
331 for (int j
= 0; j
< N
; j
++) {
332 cij
[i
* N
+ j
] = i
== 0 ? sqrt(1.0f
/ N
) :
333 sqrt(2.0f
/ N
) * cos((2 * j
+ 1) * i
* M_PI
/ 2 / N
);
337 //mtime_t tmul_0 = mdate();
338 for (int i
= 0; i
< N
; i
++) {
339 for (int j
= 0; j
< N
; j
++) {
341 for (int k
= 0; k
< N
; k
++)
342 v
+= gn
[i
* N
+ k
] * cij
[k
* N
+ j
];
346 for (int i
= 0; i
< N
; i
++) {
347 for (int j
= 0; j
< N
; j
++) {
349 for (int k
= 0; k
< N
; k
++)
350 v
+= cij
[k
* N
+ i
] * tmp
[k
* N
+ j
];
351 /* Do not bias when rounding */
354 vq
= (int)( v
* correction
* 127 + 0.5);
356 vq
= -(int)(-v
* correction
* 127 + 0.5);
357 bank
[i
* N
+ j
] = VLC_CLIP(vq
, INT16_MIN
, INT16_MAX
);
360 //mtime_t mul_duration = mdate() - tmul_0;
361 //fprintf(stderr, "IDCT took %d ms\n", (int)(mul_duration / 1000));
367 static int Callback(vlc_object_t
*object
, char const *cmd
,
368 vlc_value_t oldval
, vlc_value_t newval
, void *data
)
370 filter_t
*filter
= (filter_t
*)object
;
371 filter_sys_t
*sys
= filter
->p_sys
;
372 VLC_UNUSED(cmd
); VLC_UNUSED(oldval
); VLC_UNUSED(data
);
374 vlc_mutex_lock(&sys
->cfg
.lock
);
375 sys
->cfg
.variance
= newval
.f_float
;
376 vlc_mutex_unlock(&sys
->cfg
.lock
);
381 static int Open(vlc_object_t
*object
)
383 filter_t
*filter
= (filter_t
*)object
;
385 const vlc_chroma_description_t
*chroma
=
386 vlc_fourcc_GetChromaDescription(filter
->fmt_in
.video
.i_chroma
);
387 if (!chroma
|| chroma
->plane_count
< 3 || chroma
->pixel_size
!= 1) {
388 msg_Err(filter
, "Unsupported chroma (%4.4s)",
389 (char*)&(filter
->fmt_in
.video
.i_chroma
));
393 filter_sys_t
*sys
= malloc(sizeof(*sys
));
396 sys
->is_uv_filtered
= true;
398 sys
->seed
= URAND_SEED
;
400 int cutoff_low
= BANK_SIZE
- var_InheritInteger(filter
, CFG_PREFIX
"period-max");
401 int cutoff_high
= BANK_SIZE
- var_InheritInteger(filter
, CFG_PREFIX
"period-min");
402 cutoff_low
= VLC_CLIP(cutoff_low
, 1, BANK_SIZE
- 1);
403 cutoff_high
= VLC_CLIP(cutoff_high
, 1, BANK_SIZE
- 1);
404 if (Generate(sys
->bank
, cutoff_low
, cutoff_high
, cutoff_low
, cutoff_high
)) {
409 sys
->blend
= BlockBlendC
;
411 #if defined(CAN_COMPILE_SSE2) && 1
412 if (vlc_CPU_SSE2()) {
413 sys
->blend
= BlockBlendSse2
;
418 vlc_mutex_init(&sys
->cfg
.lock
);
419 sys
->cfg
.variance
= var_CreateGetFloatCommand(filter
, CFG_PREFIX
"variance");
420 var_AddCallback(filter
, CFG_PREFIX
"variance", Callback
, NULL
);
423 filter
->pf_video_filter
= Filter
;
427 static void Close(vlc_object_t
*object
)
429 filter_t
*filter
= (filter_t
*)object
;
430 filter_sys_t
*sys
= filter
->p_sys
;
432 var_DelCallback(filter
, CFG_PREFIX
"variance", Callback
, NULL
);
433 vlc_mutex_destroy(&sys
->cfg
.lock
);