mux:ts: convert vlc_tick_t to seconds explicitly using SEC_FROM_VLC_TICK()
[vlc.git] / modules / video_filter / grain.c
blob49e8024ebcf626815cd543d89d9044674611bdff
1 /*****************************************************************************
2 * grain.c: add film grain
3 *****************************************************************************
4 * Copyright (C) 2010 Laurent Aimar
5 * $Id$
7 * Authors: Laurent Aimar <fenrir _AT_ videolan _DOT_ org>
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms of the GNU Lesser General Public License as published by
11 * the Free Software Foundation; either version 2.1 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public License
20 * along with this program; if not, write to the Free Software Foundation,
21 * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
22 *****************************************************************************/
24 /*****************************************************************************
25 * Preamble
26 *****************************************************************************/
28 #ifdef HAVE_CONFIG_H
29 # include "config.h"
30 #endif
31 #include <assert.h>
32 #include <math.h>
34 #include <vlc_common.h>
35 #include <vlc_plugin.h>
36 #include <vlc_filter.h>
37 #include <vlc_cpu.h>
38 #include <vlc_picture.h>
39 #include <vlc_rand.h>
41 /*****************************************************************************
42 * Module descriptor
43 *****************************************************************************/
44 static int Open (vlc_object_t *);
45 static void Close(vlc_object_t *);
47 #define BANK_SIZE (64)
49 #define CFG_PREFIX "grain-"
51 #define VARIANCE_MIN (0.0)
52 #define VARIANCE_MAX (10.0)
53 #define VARIANCE_TEXT N_("Variance")
54 #define VARIANCE_LONGTEXT N_("Variance of the gaussian noise")
56 #define PERIOD_MIN 1
57 #define PERIOD_MAX BANK_SIZE
58 #define PERIOD_MIN_TEXT N_("Minimal period")
59 #define PERIOD_MIN_LONGTEXT N_("Minimal period of the noise grain in pixel")
60 #define PERIOD_MAX_TEXT N_("Maximal period")
61 #define PERIOD_MAX_LONGTEXT N_("Maximal period of the noise grain in pixel")
63 vlc_module_begin()
64 set_description(N_("Grain video filter"))
65 set_shortname( N_("Grain"))
66 set_help(N_("Adds filtered gaussian noise"))
67 set_capability( "video filter", 0 )
68 set_category(CAT_VIDEO)
69 set_subcategory(SUBCAT_VIDEO_VFILTER)
70 add_float_with_range(CFG_PREFIX "variance", 2.0, VARIANCE_MIN, VARIANCE_MAX,
71 VARIANCE_TEXT, VARIANCE_LONGTEXT, false)
72 add_integer_with_range(CFG_PREFIX "period-min", 1, PERIOD_MIN, PERIOD_MAX,
73 PERIOD_MIN_TEXT, PERIOD_MIN_LONGTEXT, false)
74 add_integer_with_range(CFG_PREFIX "period-max", 3*PERIOD_MAX/4, PERIOD_MIN, PERIOD_MAX,
75 PERIOD_MAX_TEXT, PERIOD_MAX_LONGTEXT, false)
76 set_callbacks(Open, Close)
77 vlc_module_end()
79 /*****************************************************************************
80 * Local prototypes
81 *****************************************************************************/
83 #define BLEND_SIZE (8)
84 typedef struct
86 bool is_uv_filtered;
87 uint32_t seed;
89 int scale;
90 int16_t bank[BANK_SIZE * BANK_SIZE];
91 int16_t bank_y[BANK_SIZE * BANK_SIZE];
92 int16_t bank_uv[BANK_SIZE * BANK_SIZE];
94 void (*blend)(uint8_t *dst, size_t dst_pitch,
95 const uint8_t *src, size_t src_pitch,
96 const int16_t *noise);
97 void (*emms)(void);
99 struct {
100 vlc_mutex_t lock;
101 double variance;
102 } cfg;
103 } filter_sys_t;
105 /* Simple and *really fast* RNG (xorshift[13,17,5])*/
106 #define URAND_SEED (2463534242)
107 static uint32_t urand(uint32_t *seed)
109 uint32_t s = *seed;
110 s ^= s << 13;
111 s ^= s >> 17;
112 s ^= s << 5;
113 return *seed = s;
115 /* Uniform random value between 0 and 1 */
116 static double drand(uint32_t *seed)
118 return urand(seed) / (double)UINT32_MAX;
120 /* Gaussian random value with a mean of 0 and a variance of 1 */
121 static void grand(double *r1, double *r2, uint32_t *seed)
123 double s;
124 double u1, u2;
125 do {
126 u1 = 2 * drand(seed) - 1;
127 u2 = 2 * drand(seed) - 1;
128 s = u1 * u1 + u2 * u2;
129 } while (s >= 1.0);
131 s = sqrt(-2 * log(s) / s);
132 *r1 = u1 * s;
133 *r2 = u2 * s;
136 static void BlockBlend(uint8_t *dst, size_t dst_pitch,
137 const uint8_t *src, size_t src_pitch,
138 const int16_t *noise,
139 int w, int h)
141 for (int y = 0; y < h; y++) {
142 for (int x = 0; x < w; x++) {
143 dst[y * dst_pitch + x] =
144 clip_uint8_vlc(src[y * src_pitch + x] + noise[y * BANK_SIZE +x]);
149 static void BlockBlendC(uint8_t *dst, size_t dst_pitch,
150 const uint8_t *src, size_t src_pitch,
151 const int16_t *noise)
153 BlockBlend(dst, dst_pitch, src, src_pitch, noise,
154 BLEND_SIZE, BLEND_SIZE);
157 #ifdef CAN_COMPILE_SSE2
158 #define STRING_EXPAND(x) #x
159 #define STRING(x) STRING_EXPAND(x)
160 VLC_SSE
161 static void BlockBlendSse2(uint8_t *dst, size_t dst_pitch,
162 const uint8_t *src, size_t src_pitch,
163 const int16_t *noise)
165 #if BLEND_SIZE == 8
166 /* TODO It is possible to do the math on 8 bits using
167 * paddusb X and then psubusb -X.
169 asm volatile ("pxor %%xmm0, %%xmm0\n" : :);
170 for (int i = 0; i < 8/2; i++) {
171 asm volatile (
172 "movq (%[src1]), %%xmm1\n"
173 "movq (%[src2]), %%xmm3\n"
174 "movdqu (%[noise]), %%xmm2\n"
175 "movdqu 2*"STRING(BANK_SIZE)"(%[noise]), %%xmm4\n"
177 "punpcklbw %%xmm0, %%xmm1\n"
178 "punpcklbw %%xmm0, %%xmm3\n"
180 "paddsw %%xmm2, %%xmm1\n"
181 "paddsw %%xmm4, %%xmm3\n"
182 "packuswb %%xmm1, %%xmm1\n"
183 "packuswb %%xmm3, %%xmm3\n"
184 "movq %%xmm1, (%[dst1])\n"
185 "movq %%xmm3, (%[dst2])\n"
186 : : [dst1]"r"(&dst[(2*i+0) * dst_pitch]),
187 [dst2]"r"(&dst[(2*i+1) * dst_pitch]),
188 [src1]"r"(&src[(2*i+0) * src_pitch]),
189 [src2]"r"(&src[(2*i+1) * src_pitch]),
190 [noise]"r"(&noise[2*i * BANK_SIZE])
191 : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "memory");
193 #else
194 # error "BLEND_SIZE unsupported"
195 #endif
197 static void Emms(void)
199 asm volatile ("emms");
201 #endif
204 * Scale the given signed data (on 7 bits + 1 for sign) using scale on 8 bits.
206 static void Scale(int16_t *dst, int16_t *src, int scale)
208 const int N = BANK_SIZE;
209 const int shift = 7 + 8;
211 for (int y = 0; y < N; y++) {
212 for (int x = 0; x < N; x++) {
213 const int v = src[y * N + x];
214 int vq;
215 if (v >= 0)
216 vq = ( v * scale + (1 << (shift-1)) - 1) >> shift;
217 else
218 vq = -((-v * scale + (1 << (shift-1)) - 1) >> shift);
219 dst[y * N + x] = vq;
224 static void PlaneFilter(filter_t *filter,
225 plane_t *dst, const plane_t *src,
226 int16_t *bank, uint32_t *seed)
228 filter_sys_t *sys = filter->p_sys;
230 for (int y = 0; y < dst->i_visible_lines; y += BLEND_SIZE) {
231 for (int x = 0; x < dst->i_visible_pitch; x += BLEND_SIZE) {
232 int bx = urand(seed) % (BANK_SIZE - BLEND_SIZE + 1);
233 int by = urand(seed) % (BANK_SIZE - BLEND_SIZE + 1);
234 const int16_t *noise = &bank[by * BANK_SIZE + bx];
236 int w = dst->i_visible_pitch - x;
237 int h = dst->i_visible_lines - y;
239 const uint8_t *srcp = &src->p_pixels[y * src->i_pitch + x];
240 uint8_t *dstp = &dst->p_pixels[y * dst->i_pitch + x];
242 if (w >= BLEND_SIZE && h >= BLEND_SIZE)
243 sys->blend(dstp, dst->i_pitch, srcp, src->i_pitch, noise);
244 else
245 BlockBlend(dstp, dst->i_pitch, srcp, src->i_pitch, noise,
246 __MIN(w, BLEND_SIZE), __MIN(h, BLEND_SIZE));
249 if (sys->emms)
250 sys->emms();
253 static picture_t *Filter(filter_t *filter, picture_t *src)
255 filter_sys_t *sys = filter->p_sys;
257 picture_t *dst = filter_NewPicture(filter);
258 if (!dst) {
259 picture_Release(src);
260 return NULL;
263 vlc_mutex_lock(&sys->cfg.lock);
264 const double variance = VLC_CLIP(sys->cfg.variance, VARIANCE_MIN, VARIANCE_MAX);
265 vlc_mutex_unlock(&sys->cfg.lock);
267 const int scale = 256 * sqrt(variance);
268 if (scale != sys->scale) {
269 sys->scale = scale;
270 Scale(sys->bank_y, sys->bank, sys->scale);
271 Scale(sys->bank_uv, sys->bank, sys->scale / 2);
274 for (int i = 0; i < dst->i_planes; i++) {
275 const plane_t *srcp = &src->p[i];
276 plane_t *dstp = &dst->p[i];
278 if (i == 0 || sys->is_uv_filtered) {
279 int16_t *bank = i == 0 ? sys->bank_y :
280 sys->bank_uv;
281 PlaneFilter(filter, dstp, srcp, bank, &sys->seed);
283 else {
284 plane_CopyPixels(dstp, srcp);
288 picture_CopyProperties(dst, src);
289 picture_Release(src);
290 return dst;
294 * Generate a filteried gaussian noise within [-127, 127] range.
296 static int Generate(int16_t *bank, int h_min, int h_max, int v_min, int v_max)
298 const int N = BANK_SIZE;
299 double *workspace = calloc(3 * N * N, sizeof(*workspace));
300 if (!workspace)
301 return VLC_ENOMEM;
303 double *gn = &workspace[0 * N * N];
304 double *cij = &workspace[1 * N * N];
305 double *tmp = &workspace[2 * N * N];
307 /* Create a gaussian noise matrix */
308 assert((N % 2) == 0);
309 uint32_t seed = URAND_SEED;
310 for (int y = 0; y < N; y++) {
311 for (int x = 0; x < N/2; x++) {
312 grand(&gn[y * N + 2 * x + 0], &gn[y * N + 2 * x + 1], &seed);
316 /* Clear non selected frequency.
317 * Only the central band is kept */
318 int zero = 0;
319 for (int y = 0; y < N; y++) {
320 for (int x = 0; x < N; x++) {
321 if ((x < h_min && y < v_min) || x > h_max || y > v_max) {
322 gn[y * N + x] = 0.0;
323 zero++;
327 const double correction = sqrt((double)N * N / (N * N - zero));
329 /* Filter the gaussian noise using an IDCT
330 * The algo is simple/stupid and does C * GN * Ct */
331 for (int i = 0; i < N; i++) {
332 for (int j = 0; j < N; j++) {
333 cij[i * N + j] = i == 0 ? sqrt(1.0f / N) :
334 sqrt(2.0f / N) * cos((2 * j + 1) * i * M_PI / 2 / N);
338 //vlc_tick_t tmul_0 = vlc_tick_now();
339 for (int i = 0; i < N; i++) {
340 for (int j = 0; j < N; j++) {
341 double v = 0.0;
342 for (int k = 0; k < N; k++)
343 v += gn[i * N + k] * cij[k * N + j];
344 tmp[i * N + j] = v;
347 for (int i = 0; i < N; i++) {
348 for (int j = 0; j < N; j++) {
349 double v = 0.0;
350 for (int k = 0; k < N; k++)
351 v += cij[k * N + i] * tmp[k * N + j];
352 /* Do not bias when rounding */
353 int vq;
354 if (v >= 0)
355 vq = (int)( v * correction * 127 + 0.5);
356 else
357 vq = -(int)(-v * correction * 127 + 0.5);
358 bank[i * N + j] = VLC_CLIP(vq, INT16_MIN, INT16_MAX);
361 //vlc_tick_t mul_duration = vlc_tick_now() - tmul_0;
362 //fprintf(stderr, "IDCT took %d ms\n", (int)(mul_duration / 1000));
364 free(workspace);
365 return VLC_SUCCESS;
368 static int Callback(vlc_object_t *object, char const *cmd,
369 vlc_value_t oldval, vlc_value_t newval, void *data)
371 filter_t *filter = (filter_t *)object;
372 filter_sys_t *sys = filter->p_sys;
373 VLC_UNUSED(cmd); VLC_UNUSED(oldval); VLC_UNUSED(data);
375 vlc_mutex_lock(&sys->cfg.lock);
376 sys->cfg.variance = newval.f_float;
377 vlc_mutex_unlock(&sys->cfg.lock);
379 return VLC_SUCCESS;
382 static int Open(vlc_object_t *object)
384 filter_t *filter = (filter_t *)object;
386 const vlc_chroma_description_t *chroma =
387 vlc_fourcc_GetChromaDescription(filter->fmt_in.video.i_chroma);
388 if (!chroma || chroma->plane_count < 3 || chroma->pixel_size != 1) {
389 msg_Err(filter, "Unsupported chroma (%4.4s)",
390 (char*)&(filter->fmt_in.video.i_chroma));
391 return VLC_EGENERIC;
394 filter_sys_t *sys = malloc(sizeof(*sys));
395 if (!sys)
396 return VLC_ENOMEM;
397 sys->is_uv_filtered = true;
398 sys->scale = -1;
399 sys->seed = URAND_SEED;
401 int cutoff_low = BANK_SIZE - var_InheritInteger(filter, CFG_PREFIX "period-max");
402 int cutoff_high= BANK_SIZE - var_InheritInteger(filter, CFG_PREFIX "period-min");
403 cutoff_low = VLC_CLIP(cutoff_low, 1, BANK_SIZE - 1);
404 cutoff_high = VLC_CLIP(cutoff_high, 1, BANK_SIZE - 1);
405 if (Generate(sys->bank, cutoff_low, cutoff_high, cutoff_low, cutoff_high)) {
406 free(sys);
407 return VLC_EGENERIC;
410 sys->blend = BlockBlendC;
411 sys->emms = NULL;
412 #if defined(CAN_COMPILE_SSE2) && 1
413 if (vlc_CPU_SSE2()) {
414 sys->blend = BlockBlendSse2;
415 sys->emms = Emms;
417 #endif
419 vlc_mutex_init(&sys->cfg.lock);
420 sys->cfg.variance = var_CreateGetFloatCommand(filter, CFG_PREFIX "variance");
421 var_AddCallback(filter, CFG_PREFIX "variance", Callback, NULL);
423 filter->p_sys = sys;
424 filter->pf_video_filter = Filter;
425 return VLC_SUCCESS;
428 static void Close(vlc_object_t *object)
430 filter_t *filter = (filter_t *)object;
431 filter_sys_t *sys = filter->p_sys;
433 var_DelCallback(filter, CFG_PREFIX "variance", Callback, NULL);
434 vlc_mutex_destroy(&sys->cfg.lock);
435 free(sys);