demux: mp4: avoid audio cuts on seek
[vlc.git] / modules / video_filter / grain.c
blobfc8c7fbdcb3baccc43a8c51f1380b41eb124f2f8
1 /*****************************************************************************
2 * grain.c: add film grain
3 *****************************************************************************
4 * Copyright (C) 2010 Laurent Aimar
5 * $Id$
7 * Authors: Laurent Aimar <fenrir _AT_ videolan _DOT_ org>
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms of the GNU Lesser General Public License as published by
11 * the Free Software Foundation; either version 2.1 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public License
20 * along with this program; if not, write to the Free Software Foundation,
21 * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
22 *****************************************************************************/
24 /*****************************************************************************
25 * Preamble
26 *****************************************************************************/
28 #ifdef HAVE_CONFIG_H
29 # include "config.h"
30 #endif
31 #include <assert.h>
32 #include <math.h>
34 #include <vlc_common.h>
35 #include <vlc_plugin.h>
36 #include <vlc_filter.h>
37 #include <vlc_cpu.h>
38 #include <vlc_picture.h>
39 #include <vlc_rand.h>
41 /*****************************************************************************
42 * Module descriptor
43 *****************************************************************************/
44 static int Open (vlc_object_t *);
45 static void Close(vlc_object_t *);
47 #define BANK_SIZE (64)
49 #define CFG_PREFIX "grain-"
51 #define VARIANCE_MIN (0.0)
52 #define VARIANCE_MAX (10.0)
53 #define VARIANCE_TEXT N_("Variance")
54 #define VARIANCE_LONGTEXT N_("Variance of the gaussian noise")
56 #define PERIOD_MIN 1
57 #define PERIOD_MAX BANK_SIZE
58 #define PERIOD_MIN_TEXT N_("Minimal period")
59 #define PERIOD_MIN_LONGTEXT N_("Minimal period of the noise grain in pixel")
60 #define PERIOD_MAX_TEXT N_("Maximal period")
61 #define PERIOD_MAX_LONGTEXT N_("Maximal period of the noise grain in pixel")
63 vlc_module_begin()
64 set_description(N_("Grain video filter"))
65 set_shortname( N_("Grain"))
66 set_help(N_("Adds filtered gaussian noise"))
67 set_capability( "video filter", 0 )
68 set_category(CAT_VIDEO)
69 set_subcategory(SUBCAT_VIDEO_VFILTER)
70 add_float_with_range(CFG_PREFIX "variance", 2.0, VARIANCE_MIN, VARIANCE_MAX,
71 VARIANCE_TEXT, VARIANCE_LONGTEXT, false)
72 add_integer_with_range(CFG_PREFIX "period-min", 1, PERIOD_MIN, PERIOD_MAX,
73 PERIOD_MIN_TEXT, PERIOD_MIN_LONGTEXT, false)
74 add_integer_with_range(CFG_PREFIX "period-max", 3*PERIOD_MAX/4, PERIOD_MIN, PERIOD_MAX,
75 PERIOD_MAX_TEXT, PERIOD_MAX_LONGTEXT, false)
76 set_callbacks(Open, Close)
77 vlc_module_end()
79 /*****************************************************************************
80 * Local prototypes
81 *****************************************************************************/
83 #define BLEND_SIZE (8)
84 struct filter_sys_t {
85 bool is_uv_filtered;
86 uint32_t seed;
88 int scale;
89 int16_t bank[BANK_SIZE * BANK_SIZE];
90 int16_t bank_y[BANK_SIZE * BANK_SIZE];
91 int16_t bank_uv[BANK_SIZE * BANK_SIZE];
93 void (*blend)(uint8_t *dst, size_t dst_pitch,
94 const uint8_t *src, size_t src_pitch,
95 const int16_t *noise);
96 void (*emms)(void);
98 struct {
99 vlc_mutex_t lock;
100 double variance;
101 } cfg;
104 /* Simple and *really fast* RNG (xorshift[13,17,5])*/
105 #define URAND_SEED (2463534242)
106 static uint32_t urand(uint32_t *seed)
108 uint32_t s = *seed;
109 s ^= s << 13;
110 s ^= s >> 17;
111 s ^= s << 5;
112 return *seed = s;
114 /* Uniform random value between 0 and 1 */
115 static double drand(uint32_t *seed)
117 return urand(seed) / (double)UINT32_MAX;
119 /* Gaussian random value with a mean of 0 and a variance of 1 */
120 static void grand(double *r1, double *r2, uint32_t *seed)
122 double s;
123 double u1, u2;
124 do {
125 u1 = 2 * drand(seed) - 1;
126 u2 = 2 * drand(seed) - 1;
127 s = u1 * u1 + u2 * u2;
128 } while (s >= 1.0);
130 s = sqrt(-2 * log(s) / s);
131 *r1 = u1 * s;
132 *r2 = u2 * s;
135 static void BlockBlend(uint8_t *dst, size_t dst_pitch,
136 const uint8_t *src, size_t src_pitch,
137 const int16_t *noise,
138 int w, int h)
140 for (int y = 0; y < h; y++) {
141 for (int x = 0; x < w; x++) {
142 dst[y * dst_pitch + x] =
143 clip_uint8_vlc(src[y * src_pitch + x] + noise[y * BANK_SIZE +x]);
148 static void BlockBlendC(uint8_t *dst, size_t dst_pitch,
149 const uint8_t *src, size_t src_pitch,
150 const int16_t *noise)
152 BlockBlend(dst, dst_pitch, src, src_pitch, noise,
153 BLEND_SIZE, BLEND_SIZE);
156 #ifdef CAN_COMPILE_SSE2
157 #define STRING_EXPAND(x) #x
158 #define STRING(x) STRING_EXPAND(x)
159 VLC_SSE
160 static void BlockBlendSse2(uint8_t *dst, size_t dst_pitch,
161 const uint8_t *src, size_t src_pitch,
162 const int16_t *noise)
164 #if BLEND_SIZE == 8
165 /* TODO It is possible to do the math on 8 bits using
166 * paddusb X and then psubusb -X.
168 asm volatile ("pxor %%xmm0, %%xmm0\n" : :);
169 for (int i = 0; i < 8/2; i++) {
170 asm volatile (
171 "movq (%[src1]), %%xmm1\n"
172 "movq (%[src2]), %%xmm3\n"
173 "movdqu (%[noise]), %%xmm2\n"
174 "movdqu 2*"STRING(BANK_SIZE)"(%[noise]), %%xmm4\n"
176 "punpcklbw %%xmm0, %%xmm1\n"
177 "punpcklbw %%xmm0, %%xmm3\n"
179 "paddsw %%xmm2, %%xmm1\n"
180 "paddsw %%xmm4, %%xmm3\n"
181 "packuswb %%xmm1, %%xmm1\n"
182 "packuswb %%xmm3, %%xmm3\n"
183 "movq %%xmm1, (%[dst1])\n"
184 "movq %%xmm3, (%[dst2])\n"
185 : : [dst1]"r"(&dst[(2*i+0) * dst_pitch]),
186 [dst2]"r"(&dst[(2*i+1) * dst_pitch]),
187 [src1]"r"(&src[(2*i+0) * src_pitch]),
188 [src2]"r"(&src[(2*i+1) * src_pitch]),
189 [noise]"r"(&noise[2*i * BANK_SIZE])
190 : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "memory");
192 #else
193 # error "BLEND_SIZE unsupported"
194 #endif
196 static void Emms(void)
198 asm volatile ("emms");
200 #endif
203 * Scale the given signed data (on 7 bits + 1 for sign) using scale on 8 bits.
205 static void Scale(int16_t *dst, int16_t *src, int scale)
207 const int N = BANK_SIZE;
208 const int shift = 7 + 8;
210 for (int y = 0; y < N; y++) {
211 for (int x = 0; x < N; x++) {
212 const int v = src[y * N + x];
213 int vq;
214 if (v >= 0)
215 vq = ( v * scale + (1 << (shift-1)) - 1) >> shift;
216 else
217 vq = -((-v * scale + (1 << (shift-1)) - 1) >> shift);
218 dst[y * N + x] = vq;
223 static void PlaneFilter(filter_t *filter,
224 plane_t *dst, const plane_t *src,
225 int16_t *bank, uint32_t *seed)
227 filter_sys_t *sys = filter->p_sys;
229 for (int y = 0; y < dst->i_visible_lines; y += BLEND_SIZE) {
230 for (int x = 0; x < dst->i_visible_pitch; x += BLEND_SIZE) {
231 int bx = urand(seed) % (BANK_SIZE - BLEND_SIZE + 1);
232 int by = urand(seed) % (BANK_SIZE - BLEND_SIZE + 1);
233 const int16_t *noise = &bank[by * BANK_SIZE + bx];
235 int w = dst->i_visible_pitch - x;
236 int h = dst->i_visible_lines - y;
238 const uint8_t *srcp = &src->p_pixels[y * src->i_pitch + x];
239 uint8_t *dstp = &dst->p_pixels[y * dst->i_pitch + x];
241 if (w >= BLEND_SIZE && h >= BLEND_SIZE)
242 sys->blend(dstp, dst->i_pitch, srcp, src->i_pitch, noise);
243 else
244 BlockBlend(dstp, dst->i_pitch, srcp, src->i_pitch, noise,
245 __MIN(w, BLEND_SIZE), __MIN(h, BLEND_SIZE));
248 if (sys->emms)
249 sys->emms();
252 static picture_t *Filter(filter_t *filter, picture_t *src)
254 filter_sys_t *sys = filter->p_sys;
256 picture_t *dst = filter_NewPicture(filter);
257 if (!dst) {
258 picture_Release(src);
259 return NULL;
262 vlc_mutex_lock(&sys->cfg.lock);
263 const double variance = VLC_CLIP(sys->cfg.variance, VARIANCE_MIN, VARIANCE_MAX);
264 vlc_mutex_unlock(&sys->cfg.lock);
266 const int scale = 256 * sqrt(variance);
267 if (scale != sys->scale) {
268 sys->scale = scale;
269 Scale(sys->bank_y, sys->bank, sys->scale);
270 Scale(sys->bank_uv, sys->bank, sys->scale / 2);
273 for (int i = 0; i < dst->i_planes; i++) {
274 const plane_t *srcp = &src->p[i];
275 plane_t *dstp = &dst->p[i];
277 if (i == 0 || sys->is_uv_filtered) {
278 int16_t *bank = i == 0 ? sys->bank_y :
279 sys->bank_uv;
280 PlaneFilter(filter, dstp, srcp, bank, &sys->seed);
282 else {
283 plane_CopyPixels(dstp, srcp);
287 picture_CopyProperties(dst, src);
288 picture_Release(src);
289 return dst;
293 * Generate a filteried gaussian noise within [-127, 127] range.
295 static int Generate(int16_t *bank, int h_min, int h_max, int v_min, int v_max)
297 const int N = BANK_SIZE;
298 double *workspace = calloc(3 * N * N, sizeof(*workspace));
299 if (!workspace)
300 return VLC_ENOMEM;
302 double *gn = &workspace[0 * N * N];
303 double *cij = &workspace[1 * N * N];
304 double *tmp = &workspace[2 * N * N];
306 /* Create a gaussian noise matrix */
307 assert((N % 2) == 0);
308 uint32_t seed = URAND_SEED;
309 for (int y = 0; y < N; y++) {
310 for (int x = 0; x < N/2; x++) {
311 grand(&gn[y * N + 2 * x + 0], &gn[y * N + 2 * x + 1], &seed);
315 /* Clear non selected frequency.
316 * Only the central band is kept */
317 int zero = 0;
318 for (int y = 0; y < N; y++) {
319 for (int x = 0; x < N; x++) {
320 if ((x < h_min && y < v_min) || x > h_max || y > v_max) {
321 gn[y * N + x] = 0.0;
322 zero++;
326 const double correction = sqrt((double)N * N / (N * N - zero));
328 /* Filter the gaussian noise using an IDCT
329 * The algo is simple/stupid and does C * GN * Ct */
330 for (int i = 0; i < N; i++) {
331 for (int j = 0; j < N; j++) {
332 cij[i * N + j] = i == 0 ? sqrt(1.0f / N) :
333 sqrt(2.0f / N) * cos((2 * j + 1) * i * M_PI / 2 / N);
337 //mtime_t tmul_0 = mdate();
338 for (int i = 0; i < N; i++) {
339 for (int j = 0; j < N; j++) {
340 double v = 0.0;
341 for (int k = 0; k < N; k++)
342 v += gn[i * N + k] * cij[k * N + j];
343 tmp[i * N + j] = v;
346 for (int i = 0; i < N; i++) {
347 for (int j = 0; j < N; j++) {
348 double v = 0.0;
349 for (int k = 0; k < N; k++)
350 v += cij[k * N + i] * tmp[k * N + j];
351 /* Do not bias when rounding */
352 int vq;
353 if (v >= 0)
354 vq = (int)( v * correction * 127 + 0.5);
355 else
356 vq = -(int)(-v * correction * 127 + 0.5);
357 bank[i * N + j] = VLC_CLIP(vq, INT16_MIN, INT16_MAX);
360 //mtime_t mul_duration = mdate() - tmul_0;
361 //fprintf(stderr, "IDCT took %d ms\n", (int)(mul_duration / 1000));
363 free(workspace);
364 return VLC_SUCCESS;
367 static int Callback(vlc_object_t *object, char const *cmd,
368 vlc_value_t oldval, vlc_value_t newval, void *data)
370 filter_t *filter = (filter_t *)object;
371 filter_sys_t *sys = filter->p_sys;
372 VLC_UNUSED(cmd); VLC_UNUSED(oldval); VLC_UNUSED(data);
374 vlc_mutex_lock(&sys->cfg.lock);
375 sys->cfg.variance = newval.f_float;
376 vlc_mutex_unlock(&sys->cfg.lock);
378 return VLC_SUCCESS;
381 static int Open(vlc_object_t *object)
383 filter_t *filter = (filter_t *)object;
385 const vlc_chroma_description_t *chroma =
386 vlc_fourcc_GetChromaDescription(filter->fmt_in.video.i_chroma);
387 if (!chroma || chroma->plane_count < 3 || chroma->pixel_size != 1) {
388 msg_Err(filter, "Unsupported chroma (%4.4s)",
389 (char*)&(filter->fmt_in.video.i_chroma));
390 return VLC_EGENERIC;
393 filter_sys_t *sys = malloc(sizeof(*sys));
394 if (!sys)
395 return VLC_ENOMEM;
396 sys->is_uv_filtered = true;
397 sys->scale = -1;
398 sys->seed = URAND_SEED;
400 int cutoff_low = BANK_SIZE - var_InheritInteger(filter, CFG_PREFIX "period-max");
401 int cutoff_high= BANK_SIZE - var_InheritInteger(filter, CFG_PREFIX "period-min");
402 cutoff_low = VLC_CLIP(cutoff_low, 1, BANK_SIZE - 1);
403 cutoff_high = VLC_CLIP(cutoff_high, 1, BANK_SIZE - 1);
404 if (Generate(sys->bank, cutoff_low, cutoff_high, cutoff_low, cutoff_high)) {
405 free(sys);
406 return VLC_EGENERIC;
409 sys->blend = BlockBlendC;
410 sys->emms = NULL;
411 #if defined(CAN_COMPILE_SSE2) && 1
412 if (vlc_CPU_SSE2()) {
413 sys->blend = BlockBlendSse2;
414 sys->emms = Emms;
416 #endif
418 vlc_mutex_init(&sys->cfg.lock);
419 sys->cfg.variance = var_CreateGetFloatCommand(filter, CFG_PREFIX "variance");
420 var_AddCallback(filter, CFG_PREFIX "variance", Callback, NULL);
422 filter->p_sys = sys;
423 filter->pf_video_filter = Filter;
424 return VLC_SUCCESS;
427 static void Close(vlc_object_t *object)
429 filter_t *filter = (filter_t *)object;
430 filter_sys_t *sys = filter->p_sys;
432 var_DelCallback(filter, CFG_PREFIX "variance", Callback, NULL);
433 vlc_mutex_destroy(&sys->cfg.lock);
434 free(sys);