1 /*****************************************************************************
2 * grain.c: add film grain
3 *****************************************************************************
4 * Copyright (C) 2010 Laurent Aimar
7 * Authors: Laurent Aimar <fenrir _AT_ videolan _DOT_ org>
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms of the GNU Lesser General Public License as published by
11 * the Free Software Foundation; either version 2.1 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public License
20 * along with this program; if not, write to the Free Software Foundation,
21 * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
22 *****************************************************************************/
24 /*****************************************************************************
26 *****************************************************************************/
34 #include <vlc_common.h>
35 #include <vlc_plugin.h>
36 #include <vlc_filter.h>
38 #include <vlc_picture.h>
41 /*****************************************************************************
43 *****************************************************************************/
44 static int Open (vlc_object_t
*);
45 static void Close(vlc_object_t
*);
47 #define BANK_SIZE (64)
49 #define CFG_PREFIX "grain-"
51 #define VARIANCE_MIN (0.0)
52 #define VARIANCE_MAX (10.0)
53 #define VARIANCE_TEXT N_("Variance")
54 #define VARIANCE_LONGTEXT N_("Variance of the gaussian noise")
57 #define PERIOD_MAX BANK_SIZE
58 #define PERIOD_MIN_TEXT N_("Minimal period")
59 #define PERIOD_MIN_LONGTEXT N_("Minimal period of the noise grain in pixel")
60 #define PERIOD_MAX_TEXT N_("Maximal period")
61 #define PERIOD_MAX_LONGTEXT N_("Maximal period of the noise grain in pixel")
64 set_description(N_("Grain video filter"))
65 set_shortname( N_("Grain"))
66 set_help(N_("Adds filtered gaussian noise"))
67 set_capability( "video filter", 0 )
68 set_category(CAT_VIDEO
)
69 set_subcategory(SUBCAT_VIDEO_VFILTER
)
70 add_float_with_range(CFG_PREFIX
"variance", 2.0, VARIANCE_MIN
, VARIANCE_MAX
,
71 VARIANCE_TEXT
, VARIANCE_LONGTEXT
, false)
72 add_integer_with_range(CFG_PREFIX
"period-min", 1, PERIOD_MIN
, PERIOD_MAX
,
73 PERIOD_MIN_TEXT
, PERIOD_MIN_LONGTEXT
, false)
74 add_integer_with_range(CFG_PREFIX
"period-max", 3*PERIOD_MAX
/4, PERIOD_MIN
, PERIOD_MAX
,
75 PERIOD_MAX_TEXT
, PERIOD_MAX_LONGTEXT
, false)
76 set_callbacks(Open
, Close
)
79 /*****************************************************************************
81 *****************************************************************************/
83 #define BLEND_SIZE (8)
90 int16_t bank
[BANK_SIZE
* BANK_SIZE
];
91 int16_t bank_y
[BANK_SIZE
* BANK_SIZE
];
92 int16_t bank_uv
[BANK_SIZE
* BANK_SIZE
];
94 void (*blend
)(uint8_t *dst
, size_t dst_pitch
,
95 const uint8_t *src
, size_t src_pitch
,
96 const int16_t *noise
);
105 /* Simple and *really fast* RNG (xorshift[13,17,5])*/
106 #define URAND_SEED (2463534242)
107 static uint32_t urand(uint32_t *seed
)
115 /* Uniform random value between 0 and 1 */
116 static double drand(uint32_t *seed
)
118 return urand(seed
) / (double)UINT32_MAX
;
120 /* Gaussian random value with a mean of 0 and a variance of 1 */
121 static void grand(double *r1
, double *r2
, uint32_t *seed
)
126 u1
= 2 * drand(seed
) - 1;
127 u2
= 2 * drand(seed
) - 1;
128 s
= u1
* u1
+ u2
* u2
;
131 s
= sqrt(-2 * log(s
) / s
);
136 static void BlockBlend(uint8_t *dst
, size_t dst_pitch
,
137 const uint8_t *src
, size_t src_pitch
,
138 const int16_t *noise
,
141 for (int y
= 0; y
< h
; y
++) {
142 for (int x
= 0; x
< w
; x
++) {
143 dst
[y
* dst_pitch
+ x
] =
144 clip_uint8_vlc(src
[y
* src_pitch
+ x
] + noise
[y
* BANK_SIZE
+x
]);
149 static void BlockBlendC(uint8_t *dst
, size_t dst_pitch
,
150 const uint8_t *src
, size_t src_pitch
,
151 const int16_t *noise
)
153 BlockBlend(dst
, dst_pitch
, src
, src_pitch
, noise
,
154 BLEND_SIZE
, BLEND_SIZE
);
157 #ifdef CAN_COMPILE_SSE2
158 #define STRING_EXPAND(x) #x
159 #define STRING(x) STRING_EXPAND(x)
161 static void BlockBlendSse2(uint8_t *dst
, size_t dst_pitch
,
162 const uint8_t *src
, size_t src_pitch
,
163 const int16_t *noise
)
166 /* TODO It is possible to do the math on 8 bits using
167 * paddusb X and then psubusb -X.
169 asm volatile ("pxor %%xmm0, %%xmm0\n" : :);
170 for (int i
= 0; i
< 8/2; i
++) {
172 "movq (%[src1]), %%xmm1\n"
173 "movq (%[src2]), %%xmm3\n"
174 "movdqu (%[noise]), %%xmm2\n"
175 "movdqu 2*"STRING(BANK_SIZE
)"(%[noise]), %%xmm4\n"
177 "punpcklbw %%xmm0, %%xmm1\n"
178 "punpcklbw %%xmm0, %%xmm3\n"
180 "paddsw %%xmm2, %%xmm1\n"
181 "paddsw %%xmm4, %%xmm3\n"
182 "packuswb %%xmm1, %%xmm1\n"
183 "packuswb %%xmm3, %%xmm3\n"
184 "movq %%xmm1, (%[dst1])\n"
185 "movq %%xmm3, (%[dst2])\n"
186 : : [dst1
]"r"(&dst
[(2*i
+0) * dst_pitch
]),
187 [dst2
]"r"(&dst
[(2*i
+1) * dst_pitch
]),
188 [src1
]"r"(&src
[(2*i
+0) * src_pitch
]),
189 [src2
]"r"(&src
[(2*i
+1) * src_pitch
]),
190 [noise
]"r"(&noise
[2*i
* BANK_SIZE
])
191 : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "memory");
194 # error "BLEND_SIZE unsupported"
197 static void Emms(void)
199 asm volatile ("emms");
204 * Scale the given signed data (on 7 bits + 1 for sign) using scale on 8 bits.
206 static void Scale(int16_t *dst
, int16_t *src
, int scale
)
208 const int N
= BANK_SIZE
;
209 const int shift
= 7 + 8;
211 for (int y
= 0; y
< N
; y
++) {
212 for (int x
= 0; x
< N
; x
++) {
213 const int v
= src
[y
* N
+ x
];
216 vq
= ( v
* scale
+ (1 << (shift
-1)) - 1) >> shift
;
218 vq
= -((-v
* scale
+ (1 << (shift
-1)) - 1) >> shift
);
224 static void PlaneFilter(filter_t
*filter
,
225 plane_t
*dst
, const plane_t
*src
,
226 int16_t *bank
, uint32_t *seed
)
228 filter_sys_t
*sys
= filter
->p_sys
;
230 for (int y
= 0; y
< dst
->i_visible_lines
; y
+= BLEND_SIZE
) {
231 for (int x
= 0; x
< dst
->i_visible_pitch
; x
+= BLEND_SIZE
) {
232 int bx
= urand(seed
) % (BANK_SIZE
- BLEND_SIZE
+ 1);
233 int by
= urand(seed
) % (BANK_SIZE
- BLEND_SIZE
+ 1);
234 const int16_t *noise
= &bank
[by
* BANK_SIZE
+ bx
];
236 int w
= dst
->i_visible_pitch
- x
;
237 int h
= dst
->i_visible_lines
- y
;
239 const uint8_t *srcp
= &src
->p_pixels
[y
* src
->i_pitch
+ x
];
240 uint8_t *dstp
= &dst
->p_pixels
[y
* dst
->i_pitch
+ x
];
242 if (w
>= BLEND_SIZE
&& h
>= BLEND_SIZE
)
243 sys
->blend(dstp
, dst
->i_pitch
, srcp
, src
->i_pitch
, noise
);
245 BlockBlend(dstp
, dst
->i_pitch
, srcp
, src
->i_pitch
, noise
,
246 __MIN(w
, BLEND_SIZE
), __MIN(h
, BLEND_SIZE
));
253 static picture_t
*Filter(filter_t
*filter
, picture_t
*src
)
255 filter_sys_t
*sys
= filter
->p_sys
;
257 picture_t
*dst
= filter_NewPicture(filter
);
259 picture_Release(src
);
263 vlc_mutex_lock(&sys
->cfg
.lock
);
264 const double variance
= VLC_CLIP(sys
->cfg
.variance
, VARIANCE_MIN
, VARIANCE_MAX
);
265 vlc_mutex_unlock(&sys
->cfg
.lock
);
267 const int scale
= 256 * sqrt(variance
);
268 if (scale
!= sys
->scale
) {
270 Scale(sys
->bank_y
, sys
->bank
, sys
->scale
);
271 Scale(sys
->bank_uv
, sys
->bank
, sys
->scale
/ 2);
274 for (int i
= 0; i
< dst
->i_planes
; i
++) {
275 const plane_t
*srcp
= &src
->p
[i
];
276 plane_t
*dstp
= &dst
->p
[i
];
278 if (i
== 0 || sys
->is_uv_filtered
) {
279 int16_t *bank
= i
== 0 ? sys
->bank_y
:
281 PlaneFilter(filter
, dstp
, srcp
, bank
, &sys
->seed
);
284 plane_CopyPixels(dstp
, srcp
);
288 picture_CopyProperties(dst
, src
);
289 picture_Release(src
);
294 * Generate a filteried gaussian noise within [-127, 127] range.
296 static int Generate(int16_t *bank
, int h_min
, int h_max
, int v_min
, int v_max
)
298 const int N
= BANK_SIZE
;
299 double *workspace
= calloc(3 * N
* N
, sizeof(*workspace
));
303 double *gn
= &workspace
[0 * N
* N
];
304 double *cij
= &workspace
[1 * N
* N
];
305 double *tmp
= &workspace
[2 * N
* N
];
307 /* Create a gaussian noise matrix */
308 assert((N
% 2) == 0);
309 uint32_t seed
= URAND_SEED
;
310 for (int y
= 0; y
< N
; y
++) {
311 for (int x
= 0; x
< N
/2; x
++) {
312 grand(&gn
[y
* N
+ 2 * x
+ 0], &gn
[y
* N
+ 2 * x
+ 1], &seed
);
316 /* Clear non selected frequency.
317 * Only the central band is kept */
319 for (int y
= 0; y
< N
; y
++) {
320 for (int x
= 0; x
< N
; x
++) {
321 if ((x
< h_min
&& y
< v_min
) || x
> h_max
|| y
> v_max
) {
327 const double correction
= sqrt((double)N
* N
/ (N
* N
- zero
));
329 /* Filter the gaussian noise using an IDCT
330 * The algo is simple/stupid and does C * GN * Ct */
331 for (int i
= 0; i
< N
; i
++) {
332 for (int j
= 0; j
< N
; j
++) {
333 cij
[i
* N
+ j
] = i
== 0 ? sqrt(1.0f
/ N
) :
334 sqrt(2.0f
/ N
) * cos((2 * j
+ 1) * i
* M_PI
/ 2 / N
);
338 //mtime_t tmul_0 = mdate();
339 for (int i
= 0; i
< N
; i
++) {
340 for (int j
= 0; j
< N
; j
++) {
342 for (int k
= 0; k
< N
; k
++)
343 v
+= gn
[i
* N
+ k
] * cij
[k
* N
+ j
];
347 for (int i
= 0; i
< N
; i
++) {
348 for (int j
= 0; j
< N
; j
++) {
350 for (int k
= 0; k
< N
; k
++)
351 v
+= cij
[k
* N
+ i
] * tmp
[k
* N
+ j
];
352 /* Do not bias when rounding */
355 vq
= (int)( v
* correction
* 127 + 0.5);
357 vq
= -(int)(-v
* correction
* 127 + 0.5);
358 bank
[i
* N
+ j
] = VLC_CLIP(vq
, INT16_MIN
, INT16_MAX
);
361 //mtime_t mul_duration = mdate() - tmul_0;
362 //fprintf(stderr, "IDCT took %d ms\n", (int)(mul_duration / 1000));
368 static int Callback(vlc_object_t
*object
, char const *cmd
,
369 vlc_value_t oldval
, vlc_value_t newval
, void *data
)
371 filter_t
*filter
= (filter_t
*)object
;
372 filter_sys_t
*sys
= filter
->p_sys
;
373 VLC_UNUSED(cmd
); VLC_UNUSED(oldval
); VLC_UNUSED(data
);
375 vlc_mutex_lock(&sys
->cfg
.lock
);
376 sys
->cfg
.variance
= newval
.f_float
;
377 vlc_mutex_unlock(&sys
->cfg
.lock
);
382 static int Open(vlc_object_t
*object
)
384 filter_t
*filter
= (filter_t
*)object
;
386 const vlc_chroma_description_t
*chroma
=
387 vlc_fourcc_GetChromaDescription(filter
->fmt_in
.video
.i_chroma
);
388 if (!chroma
|| chroma
->plane_count
< 3 || chroma
->pixel_size
!= 1) {
389 msg_Err(filter
, "Unsupported chroma (%4.4s)",
390 (char*)&(filter
->fmt_in
.video
.i_chroma
));
394 filter_sys_t
*sys
= malloc(sizeof(*sys
));
397 sys
->is_uv_filtered
= true;
399 sys
->seed
= URAND_SEED
;
401 int cutoff_low
= BANK_SIZE
- var_InheritInteger(filter
, CFG_PREFIX
"period-max");
402 int cutoff_high
= BANK_SIZE
- var_InheritInteger(filter
, CFG_PREFIX
"period-min");
403 cutoff_low
= VLC_CLIP(cutoff_low
, 1, BANK_SIZE
- 1);
404 cutoff_high
= VLC_CLIP(cutoff_high
, 1, BANK_SIZE
- 1);
405 if (Generate(sys
->bank
, cutoff_low
, cutoff_high
, cutoff_low
, cutoff_high
)) {
410 sys
->blend
= BlockBlendC
;
412 #if defined(CAN_COMPILE_SSE2) && 1
413 if (vlc_CPU_SSE2()) {
414 sys
->blend
= BlockBlendSse2
;
419 vlc_mutex_init(&sys
->cfg
.lock
);
420 sys
->cfg
.variance
= var_CreateGetFloatCommand(filter
, CFG_PREFIX
"variance");
421 var_AddCallback(filter
, CFG_PREFIX
"variance", Callback
, NULL
);
424 filter
->pf_video_filter
= Filter
;
428 static void Close(vlc_object_t
*object
)
430 filter_t
*filter
= (filter_t
*)object
;
431 filter_sys_t
*sys
= filter
->p_sys
;
433 var_DelCallback(filter
, CFG_PREFIX
"variance", Callback
, NULL
);
434 vlc_mutex_destroy(&sys
->cfg
.lock
);