WinGui: Fix another instance of the Caliburn vs Json.net sillyness where objects...
[HandBrake.git] / libhb / nlmeans.c
bloba306c8f4add66612ea86faa45263b04486e87b34
1 /* nlmeans.c
3 Copyright (c) 2013 Dirk Farin
4 Copyright (c) 2003-2015 HandBrake Team
5 This file is part of the HandBrake source code
6 Homepage: <http://handbrake.fr/>.
7 It may be used under the terms of the GNU General Public License v2.
8 For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html
9 */
11 /* Usage
13 * Parameters:
14 * lumaY_strength : lumaY_origin_tune : lumaY_patch_size : lumaY_range : lumaY_frames : lumaY_prefilter :
15 * chromaB_strength : chromaB_origin_tune : chromaB_patch_size : chromaB_range : chromaB_frames : chromaB_prefilter :
16 * chromaR_strength : chromaR_origin_tune : chromaR_patch_size : chromaR_range : chromaR_frames : chromaR_prefilter
18 * Defaults:
19 * 8:1:7:3:2:0 for each channel (equivalent to 8:1:7:3:2:0:8:1:7:3:2:0:8:1:7:3:2:0)
21 * Parameters cascade, e.g. 6:0.8:7:3:3:0:4:1 sets:
22 * strength 6, origin tune 0.8 for luma
23 * patch size 7, range 3, frames 3, prefilter 0 for all channels
24 * strength 4, origin tune 1 for both chroma channels
26 * Strength is relative and must be adjusted; ALL parameters affect overall strength.
27 * Lower origin tune improves results for noisier input or animation (film 0.5-1, animation 0.15-0.5).
28 * Large patch size (>9) may greatly reduce quality by clobbering detail.
29 * Larger search range increases quality; however, computation time increases exponentially.
30 * Large number of frames (film >3, animation >6) may cause temporal smearing.
31 * Prefiltering can potentially improve weight decisions, yielding better results for difficult sources.
33 * Prefilter enum combos:
34 * 1: Mean 3x3
35 * 2: Mean 5x5
36 * 3: Mean 5x5 (overrides Mean 3x3)
37 * 257: Mean 3x3 reduced by 25%
38 * 258: Mean 5x5 reduced by 25%
39 * 513: Mean 3x3 reduced by 50%
40 * 514: Mean 5x5 reduced by 50%
41 * 769: Mean 3x3 reduced by 75%
42 * 770: Mean 5x5 reduced by 75%
43 * 1025: Mean 3x3 plus edge boost (restores lost edge detail)
44 * 1026: Mean 5x5 plus edge boost
45 * 1281: Mean 3x3 reduced by 25% plus edge boost
46 * etc...
47 * 2049: Mean 3x3 passthru (NLMeans off, prefilter is the output)
48 * etc...
49 * 3329: Mean 3x3 reduced by 25% plus edge boost, passthru
50 * etc...
53 #include "hb.h"
54 #include "hbffmpeg.h"
55 #include "taskset.h"
56 #include "nlmeans.h"
58 #define NLMEANS_STRENGTH_LUMA_DEFAULT 6
59 #define NLMEANS_STRENGTH_CHROMA_DEFAULT 6
60 #define NLMEANS_ORIGIN_TUNE_LUMA_DEFAULT 1
61 #define NLMEANS_ORIGIN_TUNE_CHROMA_DEFAULT 1
62 #define NLMEANS_PATCH_SIZE_LUMA_DEFAULT 7
63 #define NLMEANS_PATCH_SIZE_CHROMA_DEFAULT 7
64 #define NLMEANS_RANGE_LUMA_DEFAULT 3
65 #define NLMEANS_RANGE_CHROMA_DEFAULT 3
66 #define NLMEANS_FRAMES_LUMA_DEFAULT 2
67 #define NLMEANS_FRAMES_CHROMA_DEFAULT 2
68 #define NLMEANS_PREFILTER_LUMA_DEFAULT 0
69 #define NLMEANS_PREFILTER_CHROMA_DEFAULT 0
71 #define NLMEANS_PREFILTER_MODE_MEAN3X3 1
72 #define NLMEANS_PREFILTER_MODE_MEAN5X5 2
73 #define NLMEANS_PREFILTER_MODE_MEDIAN3X3 4
74 #define NLMEANS_PREFILTER_MODE_MEDIAN5X5 8
75 #define NLMEANS_PREFILTER_MODE_RESERVED16 16 // Reserved
76 #define NLMEANS_PREFILTER_MODE_RESERVED32 32 // Reserved
77 #define NLMEANS_PREFILTER_MODE_RESERVED64 64 // Reserved
78 #define NLMEANS_PREFILTER_MODE_RESERVED128 128 // Reserved
79 #define NLMEANS_PREFILTER_MODE_REDUCE25 256
80 #define NLMEANS_PREFILTER_MODE_REDUCE50 512
81 #define NLMEANS_PREFILTER_MODE_EDGEBOOST 1024
82 #define NLMEANS_PREFILTER_MODE_PASSTHRU 2048
84 #define NLMEANS_SORT(a,b) { if (a > b) NLMEANS_SWAP(a, b); }
85 #define NLMEANS_SWAP(a,b) { a = (a ^ b); b = (a ^ b); a = (b ^ a); }
87 #define NLMEANS_FRAMES_MAX 32
88 #define NLMEANS_EXPSIZE 128
90 typedef struct
92 uint8_t *mem;
93 uint8_t *mem_pre;
94 uint8_t *image;
95 uint8_t *image_pre;
96 int w;
97 int h;
98 int border;
99 hb_lock_t *mutex;
100 int prefiltered;
101 } BorderedPlane;
103 typedef struct
105 int width;
106 int height;
107 int fmt;
108 BorderedPlane plane[3];
109 hb_buffer_settings_t s;
110 } Frame;
112 struct PixelSum
114 float weight_sum;
115 float pixel_sum;
118 typedef struct
120 hb_filter_private_t *pv;
121 int segment;
122 hb_buffer_t *out;
123 } nlmeans_thread_arg_t;
125 struct hb_filter_private_s
127 double strength[3]; // averaging weight decay, larger produces smoother output
128 double origin_tune[3]; // weight tuning for origin patch, 0.00..1.00
129 int patch_size[3]; // pixel context region width (must be odd)
130 int range[3]; // spatial search window width (must be odd)
131 int nframes[3]; // temporal search depth in frames
132 int prefilter[3]; // prefilter mode, can improve weight analysis
134 float exptable[3][NLMEANS_EXPSIZE];
135 float weight_fact_table[3];
136 int diff_max[3];
138 NLMeansFunctions functions;
140 Frame *frame;
141 int next_frame;
142 int max_frames;
144 taskset_t taskset;
145 int thread_count;
146 nlmeans_thread_arg_t **thread_data;
149 static int nlmeans_init(hb_filter_object_t *filter, hb_filter_init_t *init);
150 static int nlmeans_work(hb_filter_object_t *filter,
151 hb_buffer_t **buf_in,
152 hb_buffer_t **buf_out);
153 static void nlmeans_close(hb_filter_object_t *filter);
155 static void nlmeans_filter_thread(void *thread_args_v);
157 hb_filter_object_t hb_filter_nlmeans =
159 .id = HB_FILTER_NLMEANS,
160 .enforce_order = 1,
161 .name = "Denoise (nlmeans)",
162 .settings = NULL,
163 .init = nlmeans_init,
164 .work = nlmeans_work,
165 .close = nlmeans_close,
168 static void nlmeans_border(uint8_t *src,
169 const int w,
170 const int h,
171 const int border)
173 const int bw = w + 2 * border;
174 uint8_t *image = src + border + bw * border;
176 // Create faux borders using edge pixels
177 for (int y = 0; y < h; y++)
179 for (int x = 0; x < border; x++)
181 *(image + y*bw - x - 1) = *(image + y*bw + x);
182 *(image + y*bw + x + w) = *(image + y*bw - x + (w-1));
185 for (int y = 0; y < border; y++)
187 memcpy(image - border - (y+1)*bw, image - border + y*bw, bw);
188 memcpy(image - border + (y+h)*bw, image - border + (h-y-1)*bw, bw);
193 static void nlmeans_deborder(const BorderedPlane *src,
194 uint8_t *dst,
195 const int w,
196 const int s,
197 const int h)
199 const int bw = src->w + 2 * src->border;
200 uint8_t *image = src->mem + src->border + bw * src->border;
202 int width = w;
203 if (src->w < width)
205 width = src->w;
208 // Copy main image
209 for (int y = 0; y < h; y++)
211 memcpy(dst + y * s, image + y * bw, width);
216 static void nlmeans_alloc(const uint8_t *src,
217 const int src_w,
218 const int src_s,
219 const int src_h,
220 BorderedPlane *dst,
221 const int border)
223 const int bw = src_w + 2 * border;
224 const int bh = src_h + 2 * border;
226 uint8_t *mem = malloc(bw * bh * sizeof(uint8_t));
227 uint8_t *image = mem + border + bw * border;
229 // Copy main image
230 for (int y = 0; y < src_h; y++)
232 memcpy(image + y * bw, src + y * src_s, src_w);
235 dst->mem = mem;
236 dst->image = image;
237 dst->w = src_w;
238 dst->h = src_h;
239 dst->border = border;
241 nlmeans_border(dst->mem, dst->w, dst->h, dst->border);
242 dst->mem_pre = dst->mem;
243 dst->image_pre = dst->image;
247 static void nlmeans_filter_mean(const uint8_t *src,
248 uint8_t *dst,
249 const int w,
250 const int h,
251 const int border,
252 const int size)
255 // Mean filter
256 const int bw = w + 2 * border;
257 const int offset_min = -((size - 1) /2);
258 const int offset_max = (size + 1) /2;
259 const double pixel_weight = 1.0 / (size * size);
260 uint16_t pixel_sum;
261 for (int y = 0; y < h; y++)
263 for (int x = 0; x < w; x++)
265 pixel_sum = 0;
266 for (int k = offset_min; k < offset_max; k++)
268 for (int j = offset_min; j < offset_max; j++)
270 pixel_sum = pixel_sum + *(src + bw*(y+j) + (x+k));
273 *(dst + bw*y + x) = (uint8_t)(pixel_sum * pixel_weight);
279 static uint8_t nlmeans_filter_median_opt(uint8_t *pixels,
280 const int size)
283 // Optimized sorting networks
284 if (size == 3)
286 /* opt_med9() via Nicolas Devillard
287 * http://ndevilla.free.fr/median/median.pdf
289 NLMEANS_SORT(pixels[1], pixels[2]); NLMEANS_SORT(pixels[4], pixels[5]); NLMEANS_SORT(pixels[7], pixels[8]);
290 NLMEANS_SORT(pixels[0], pixels[1]); NLMEANS_SORT(pixels[3], pixels[4]); NLMEANS_SORT(pixels[6], pixels[7]);
291 NLMEANS_SORT(pixels[1], pixels[2]); NLMEANS_SORT(pixels[4], pixels[5]); NLMEANS_SORT(pixels[7], pixels[8]);
292 NLMEANS_SORT(pixels[0], pixels[3]); NLMEANS_SORT(pixels[5], pixels[8]); NLMEANS_SORT(pixels[4], pixels[7]);
293 NLMEANS_SORT(pixels[3], pixels[6]); NLMEANS_SORT(pixels[1], pixels[4]); NLMEANS_SORT(pixels[2], pixels[5]);
294 NLMEANS_SORT(pixels[4], pixels[7]); NLMEANS_SORT(pixels[4], pixels[2]); NLMEANS_SORT(pixels[6], pixels[4]);
295 NLMEANS_SORT(pixels[4], pixels[2]);
296 return pixels[4];
298 else if (size == 5)
300 /* opt_med25() via Nicolas Devillard
301 * http://ndevilla.free.fr/median/median.pdf
303 NLMEANS_SORT(pixels[0], pixels[1]); NLMEANS_SORT(pixels[3], pixels[4]); NLMEANS_SORT(pixels[2], pixels[4]);
304 NLMEANS_SORT(pixels[2], pixels[3]); NLMEANS_SORT(pixels[6], pixels[7]); NLMEANS_SORT(pixels[5], pixels[7]);
305 NLMEANS_SORT(pixels[5], pixels[6]); NLMEANS_SORT(pixels[9], pixels[10]); NLMEANS_SORT(pixels[8], pixels[10]);
306 NLMEANS_SORT(pixels[8], pixels[9]); NLMEANS_SORT(pixels[12], pixels[13]); NLMEANS_SORT(pixels[11], pixels[13]);
307 NLMEANS_SORT(pixels[11], pixels[12]); NLMEANS_SORT(pixels[15], pixels[16]); NLMEANS_SORT(pixels[14], pixels[16]);
308 NLMEANS_SORT(pixels[14], pixels[15]); NLMEANS_SORT(pixels[18], pixels[19]); NLMEANS_SORT(pixels[17], pixels[19]);
309 NLMEANS_SORT(pixels[17], pixels[18]); NLMEANS_SORT(pixels[21], pixels[22]); NLMEANS_SORT(pixels[20], pixels[22]);
310 NLMEANS_SORT(pixels[20], pixels[21]); NLMEANS_SORT(pixels[23], pixels[24]); NLMEANS_SORT(pixels[2], pixels[5]);
311 NLMEANS_SORT(pixels[3], pixels[6]); NLMEANS_SORT(pixels[0], pixels[6]); NLMEANS_SORT(pixels[0], pixels[3]);
312 NLMEANS_SORT(pixels[4], pixels[7]); NLMEANS_SORT(pixels[1], pixels[7]); NLMEANS_SORT(pixels[1], pixels[4]);
313 NLMEANS_SORT(pixels[11], pixels[14]); NLMEANS_SORT(pixels[8], pixels[14]); NLMEANS_SORT(pixels[8], pixels[11]);
314 NLMEANS_SORT(pixels[12], pixels[15]); NLMEANS_SORT(pixels[9], pixels[15]); NLMEANS_SORT(pixels[9], pixels[12]);
315 NLMEANS_SORT(pixels[13], pixels[16]); NLMEANS_SORT(pixels[10], pixels[16]); NLMEANS_SORT(pixels[10], pixels[13]);
316 NLMEANS_SORT(pixels[20], pixels[23]); NLMEANS_SORT(pixels[17], pixels[23]); NLMEANS_SORT(pixels[17], pixels[20]);
317 NLMEANS_SORT(pixels[21], pixels[24]); NLMEANS_SORT(pixels[18], pixels[24]); NLMEANS_SORT(pixels[18], pixels[21]);
318 NLMEANS_SORT(pixels[19], pixels[22]); NLMEANS_SORT(pixels[8], pixels[17]); NLMEANS_SORT(pixels[9], pixels[18]);
319 NLMEANS_SORT(pixels[0], pixels[18]); NLMEANS_SORT(pixels[0], pixels[9]); NLMEANS_SORT(pixels[10], pixels[19]);
320 NLMEANS_SORT(pixels[1], pixels[19]); NLMEANS_SORT(pixels[1], pixels[10]); NLMEANS_SORT(pixels[11], pixels[20]);
321 NLMEANS_SORT(pixels[2], pixels[20]); NLMEANS_SORT(pixels[2], pixels[11]); NLMEANS_SORT(pixels[12], pixels[21]);
322 NLMEANS_SORT(pixels[3], pixels[21]); NLMEANS_SORT(pixels[3], pixels[12]); NLMEANS_SORT(pixels[13], pixels[22]);
323 NLMEANS_SORT(pixels[4], pixels[22]); NLMEANS_SORT(pixels[4], pixels[13]); NLMEANS_SORT(pixels[14], pixels[23]);
324 NLMEANS_SORT(pixels[5], pixels[23]); NLMEANS_SORT(pixels[5], pixels[14]); NLMEANS_SORT(pixels[15], pixels[24]);
325 NLMEANS_SORT(pixels[6], pixels[24]); NLMEANS_SORT(pixels[6], pixels[15]); NLMEANS_SORT(pixels[7], pixels[16]);
326 NLMEANS_SORT(pixels[7], pixels[19]); NLMEANS_SORT(pixels[13], pixels[21]); NLMEANS_SORT(pixels[15], pixels[23]);
327 NLMEANS_SORT(pixels[7], pixels[13]); NLMEANS_SORT(pixels[7], pixels[15]); NLMEANS_SORT(pixels[1], pixels[9]);
328 NLMEANS_SORT(pixels[3], pixels[11]); NLMEANS_SORT(pixels[5], pixels[17]); NLMEANS_SORT(pixels[11], pixels[17]);
329 NLMEANS_SORT(pixels[9], pixels[17]); NLMEANS_SORT(pixels[4], pixels[10]); NLMEANS_SORT(pixels[6], pixels[12]);
330 NLMEANS_SORT(pixels[7], pixels[14]); NLMEANS_SORT(pixels[4], pixels[6]); NLMEANS_SORT(pixels[4], pixels[7]);
331 NLMEANS_SORT(pixels[12], pixels[14]); NLMEANS_SORT(pixels[10], pixels[14]); NLMEANS_SORT(pixels[6], pixels[7]);
332 NLMEANS_SORT(pixels[10], pixels[12]); NLMEANS_SORT(pixels[6], pixels[10]); NLMEANS_SORT(pixels[6], pixels[17]);
333 NLMEANS_SORT(pixels[12], pixels[17]); NLMEANS_SORT(pixels[7], pixels[17]); NLMEANS_SORT(pixels[7], pixels[10]);
334 NLMEANS_SORT(pixels[12], pixels[18]); NLMEANS_SORT(pixels[7], pixels[12]); NLMEANS_SORT(pixels[10], pixels[18]);
335 NLMEANS_SORT(pixels[12], pixels[20]); NLMEANS_SORT(pixels[10], pixels[20]); NLMEANS_SORT(pixels[10], pixels[12]);
336 return pixels[12];
339 // Network for size not implemented
340 return pixels[(int)((size * size)/2)];
344 static void nlmeans_filter_median(const uint8_t *src,
345 uint8_t *dst,
346 const int w,
347 const int h,
348 const int border,
349 const int size)
351 // Median filter
352 const int bw = w + 2 * border;
353 const int offset_min = -((size - 1) /2);
354 const int offset_max = (size + 1) /2;
355 int index;
356 uint8_t pixels[size * size];
357 for (int y = 0; y < h; y++)
359 for (int x = 0; x < w; x++)
361 index = 0;
362 for (int k = offset_min; k < offset_max; k++)
364 for (int j = offset_min; j < offset_max; j++)
366 pixels[index] = *(src + bw*(y+j) + (x+k));
367 index++;
370 *(dst + bw*y + x) = nlmeans_filter_median_opt(pixels, size);
376 static void nlmeans_filter_edgeboost(const uint8_t *src,
377 uint8_t *dst,
378 const int w,
379 const int h,
380 const int border)
382 const int bw = w + 2 * border;
383 const int bh = h + 2 * border;
385 // Custom kernel
386 const int kernel_size = 3;
387 const int kernel[3][3] = {{-31, 0, 31},
388 {-44, 0, 44},
389 {-31, 0, 31}};
390 const double kernel_coef = 1.0 / 126.42;
392 // Detect edges
393 const int offset_min = -((kernel_size - 1) /2);
394 const int offset_max = (kernel_size + 1) /2;
395 uint16_t pixel1;
396 uint16_t pixel2;
397 uint8_t *mask_mem = calloc(bw * bh, sizeof(uint8_t));
398 uint8_t *mask = mask_mem + border + bw * border;
399 for (int y = 0; y < h; y++)
401 for (int x = 0; x < w; x++)
403 pixel1 = 0;
404 pixel2 = 0;
405 for (int k = offset_min; k < offset_max; k++)
407 for (int j = offset_min; j < offset_max; j++)
409 pixel1 += kernel[j+1][k+1] * *(src + bw*(y+j) + (x+k));
410 pixel2 += kernel[k+1][j+1] * *(src + bw*(y+j) + (x+k));
413 pixel1 = pixel1 > 0 ? pixel1 : -pixel1;
414 pixel2 = pixel2 > 0 ? pixel2 : -pixel2;
415 pixel1 = (uint16_t)(((double)pixel1 * kernel_coef) + 128);
416 pixel2 = (uint16_t)(((double)pixel2 * kernel_coef) + 128);
417 *(mask + bw*y + x) = (uint8_t)(pixel1 + pixel2);
418 if (*(mask + bw*y + x) > 160)
420 *(mask + bw*y + x) = 235;
422 else if (*(mask + bw*y + x) > 16)
424 *(mask + bw*y + x) = 128;
426 else
428 *(mask + bw*y + x) = 16;
433 // Post-process and output
434 int pixels;
435 for (int y = 0; y < h; y++)
437 for (int x = 0; x < w; x++)
439 if (*(mask + bw*y + x) > 16)
441 // Count nearby edge pixels
442 pixels = 0;
443 for (int k = offset_min; k < offset_max; k++)
445 for (int j = offset_min; j < offset_max; j++)
447 if (*(mask + bw*(y+j) + (x+k)) > 16)
449 pixels++;
453 // Remove false positive
454 if (pixels < 3)
456 *(mask + bw*y + x) = 16;
458 // Filter output
459 if (*(mask + bw*y + x) > 16)
461 if (*(mask + bw*y + x) == 235)
463 *(dst + bw*y + x) = (3 * *(src + bw*y + x) + 1 * *(dst + bw*y + x)) /4;
465 else
467 *(dst + bw*y + x) = (2 * *(src + bw*y + x) + 3 * *(dst + bw*y + x)) /5;
469 //*(dst + bw*y + x) = *(mask + bw*y + x); // Overlay mask
472 //*(dst + bw*y + x) = *(mask + bw*y + x); // Full mask
476 free(mask_mem);
479 static void nlmeans_prefilter(BorderedPlane *src,
480 const int filter_type)
482 hb_lock(src->mutex);
483 if (src->prefiltered)
485 hb_unlock(src->mutex);
486 return;
489 if (filter_type & NLMEANS_PREFILTER_MODE_MEAN3X3 ||
490 filter_type & NLMEANS_PREFILTER_MODE_MEAN5X5 ||
491 filter_type & NLMEANS_PREFILTER_MODE_MEDIAN3X3 ||
492 filter_type & NLMEANS_PREFILTER_MODE_MEDIAN5X5)
495 // Source image
496 const uint8_t *mem = src->mem;
497 const uint8_t *image = src->image;
498 const int border = src->border;
499 const int w = src->w;
500 const int h = src->h;
501 const int bw = w + 2 * border;
502 const int bh = h + 2 * border;
504 // Duplicate plane
505 uint8_t *mem_pre = malloc(bw * bh * sizeof(uint8_t));
506 uint8_t *image_pre = mem_pre + border + bw * border;
507 for (int y = 0; y < h; y++)
509 memcpy(mem_pre + y * bw, mem + y * bw, bw);
512 // Filter plane; should already have at least 2px extra border on each side
513 if (filter_type & NLMEANS_PREFILTER_MODE_MEDIAN5X5)
515 // Median 5x5
516 nlmeans_filter_median(image, image_pre, w, h, border, 5);
518 else if (filter_type & NLMEANS_PREFILTER_MODE_MEDIAN3X3)
520 // Median 3x3
521 nlmeans_filter_median(image, image_pre, w, h, border, 3);
523 else if (filter_type & NLMEANS_PREFILTER_MODE_MEAN5X5)
525 // Mean 5x5
526 nlmeans_filter_mean(image, image_pre, w, h, border, 5);
528 else if (filter_type & NLMEANS_PREFILTER_MODE_MEAN3X3)
530 // Mean 3x3
531 nlmeans_filter_mean(image, image_pre, w, h, border, 3);
534 // Restore edges
535 if (filter_type & NLMEANS_PREFILTER_MODE_EDGEBOOST)
537 nlmeans_filter_edgeboost(image, image_pre, w, h, border);
540 // Blend source and destination for lesser effect
541 int wet = 1;
542 int dry = 0;
543 if (filter_type & NLMEANS_PREFILTER_MODE_REDUCE50 &&
544 filter_type & NLMEANS_PREFILTER_MODE_REDUCE25)
546 wet = 1;
547 dry = 3;
549 else if (filter_type & NLMEANS_PREFILTER_MODE_REDUCE50)
551 wet = 1;
552 dry = 1;
554 else if (filter_type & NLMEANS_PREFILTER_MODE_REDUCE25)
556 wet = 3;
557 dry = 1;
559 if (dry > 0)
561 for (int y = 0; y < bh; y++)
563 for (int x = 0; x < bw; x++)
565 *(mem_pre + bw*y + x) = (uint8_t)((wet * *(mem_pre + bw*y + x) + dry * *(mem + bw*y + x)) / (wet + dry));
570 // Assign result
571 src->mem_pre = mem_pre;
572 src->image_pre = image_pre;
574 // Recreate borders
575 nlmeans_border(mem_pre, w, h, border);
578 src->prefiltered = 1;
579 hb_unlock(src->mutex);
582 static void build_integral_scalar(uint32_t *integral,
583 int integral_stride,
584 const uint8_t *src,
585 const uint8_t *src_pre,
586 const uint8_t *compare,
587 const uint8_t *compare_pre,
588 int w,
589 int border,
590 int dst_w,
591 int dst_h,
592 int dx,
593 int dy)
595 const int bw = w + 2 * border;
596 for (int y = 0; y < dst_h; y++)
598 const uint8_t *p1 = src_pre + y*bw;
599 const uint8_t *p2 = compare_pre + (y+dy)*bw + dx;
600 uint32_t *out = integral + (y*integral_stride);
602 for (int x = 0; x < dst_w; x++)
604 int diff = *p1 - *p2;
605 *out = *(out-1) + diff * diff;
606 out++;
607 p1++;
608 p2++;
611 if (y > 0)
613 out = integral + y*integral_stride;
615 for (int x = 0; x < dst_w; x++)
617 *out += *(out - integral_stride);
618 out++;
624 static void nlmeans_plane(NLMeansFunctions *functions,
625 Frame *frame,
626 int prefilter,
627 int plane,
628 int nframes,
629 uint8_t *dst,
630 int dst_w,
631 int dst_s,
632 int dst_h,
633 double h_param,
634 double origin_tune,
635 int n,
636 int r,
637 const float *exptable,
638 const float weight_fact_table,
639 const int diff_max)
641 const int n_half = (n-1) /2;
642 const int r_half = (r-1) /2;
644 // Source image
645 const uint8_t *src = frame[0].plane[plane].image;
646 const uint8_t *src_pre = frame[0].plane[plane].image_pre;
647 const int w = frame[0].plane[plane].w;
648 const int border = frame[0].plane[plane].border;
649 const int bw = w + 2 * border;
651 // Allocate temporary pixel sums
652 struct PixelSum *tmp_data = calloc(dst_w * dst_h, sizeof(struct PixelSum));
654 // Allocate integral image
655 const int integral_stride = ((dst_w + 15) / 16 * 16) + 2 * 16;
656 uint32_t* const integral_mem = calloc(integral_stride * (dst_h+1), sizeof(uint32_t));
657 uint32_t* const integral = integral_mem + integral_stride + 16;
659 // Iterate through available frames
660 for (int f = 0; f < nframes; f++)
662 nlmeans_prefilter(&frame[f].plane[plane], prefilter);
664 // Compare image
665 const uint8_t *compare = frame[f].plane[plane].image;
666 const uint8_t *compare_pre = frame[f].plane[plane].image_pre;
668 // Iterate through all displacements
669 for (int dy = -r_half; dy <= r_half; dy++)
671 for (int dx = -r_half; dx <= r_half; dx++)
674 // Apply special weight tuning to origin patch
675 if (dx == 0 && dy == 0 && f == 0)
677 // TODO: Parallelize this
678 for (int y = n_half; y < dst_h-n + n_half; y++)
680 for (int x = n_half; x < dst_w-n + n_half; x++)
682 tmp_data[y*dst_w + x].weight_sum += origin_tune;
683 tmp_data[y*dst_w + x].pixel_sum += origin_tune * src[y*bw + x];
686 continue;
689 // Build integral
690 functions->build_integral(integral,
691 integral_stride,
692 src,
693 src_pre,
694 compare,
695 compare_pre,
697 border,
698 dst_w,
699 dst_h,
701 dy);
703 // Average displacement
704 // TODO: Parallelize this
705 for (int y = 0; y <= dst_h-n; y++)
707 const uint32_t *integral_ptr1 = integral + (y -1)*integral_stride - 1;
708 const uint32_t *integral_ptr2 = integral + (y+n-1)*integral_stride - 1;
710 for (int x = 0; x <= dst_w-n; x++)
712 const int xc = x + n_half;
713 const int yc = y + n_half;
715 // Difference between patches
716 const int diff = (uint32_t)(integral_ptr2[n] - integral_ptr2[0] - integral_ptr1[n] + integral_ptr1[0]);
718 // Sum pixel with weight
719 if (diff < diff_max)
721 const int diffidx = diff * weight_fact_table;
723 //float weight = exp(-diff*weightFact);
724 const float weight = exptable[diffidx];
726 tmp_data[yc*dst_w + xc].weight_sum += weight;
727 tmp_data[yc*dst_w + xc].pixel_sum += weight * compare[(yc+dy)*bw + xc + dx];
730 integral_ptr1++;
731 integral_ptr2++;
738 // Copy edges
739 for (int y = 0; y < dst_h; y++)
741 for (int x = 0; x < n_half; x++)
743 *(dst + y * dst_s + x) = *(src + y * bw - x - 1);
744 *(dst + y * dst_s - x + (dst_w - 1)) = *(src + y * bw + x + dst_w);
747 for (int y = 0; y < n_half; y++)
749 memcpy(dst + y*dst_s, src - (y+1)*bw, dst_w);
750 memcpy(dst + (dst_h-y-1)*dst_s, src + (y+dst_h)*bw, dst_w);
753 // Copy main image
754 uint8_t result;
755 for (int y = n_half; y < dst_h-n_half; y++)
757 for (int x = n_half; x < dst_w-n_half; x++)
759 result = (uint8_t)(tmp_data[y*dst_w + x].pixel_sum / tmp_data[y*dst_w + x].weight_sum);
760 *(dst + y*dst_s + x) = result ? result : *(src + y*bw + x);
764 free(tmp_data);
765 free(integral_mem);
769 static int nlmeans_init(hb_filter_object_t *filter,
770 hb_filter_init_t *init)
772 filter->private_data = calloc(sizeof(struct hb_filter_private_s), 1);
773 hb_filter_private_t *pv = filter->private_data;
774 NLMeansFunctions *functions = &pv->functions;
776 functions->build_integral = build_integral_scalar;
777 #if defined(ARCH_X86)
778 nlmeans_init_x86(functions);
779 #endif
781 // Mark parameters unset
782 for (int c = 0; c < 3; c++)
784 pv->strength[c] = -1;
785 pv->origin_tune[c] = -1;
786 pv->patch_size[c] = -1;
787 pv->range[c] = -1;
788 pv->nframes[c] = -1;
789 pv->prefilter[c] = -1;
792 // Read user parameters
793 if (filter->settings != NULL)
795 sscanf(filter->settings, "%lf:%lf:%d:%d:%d:%d:%lf:%lf:%d:%d:%d:%d:%lf:%lf:%d:%d:%d:%d",
796 &pv->strength[0], &pv->origin_tune[0], &pv->patch_size[0], &pv->range[0], &pv->nframes[0], &pv->prefilter[0],
797 &pv->strength[1], &pv->origin_tune[1], &pv->patch_size[1], &pv->range[1], &pv->nframes[1], &pv->prefilter[1],
798 &pv->strength[2], &pv->origin_tune[2], &pv->patch_size[2], &pv->range[2], &pv->nframes[2], &pv->prefilter[2]);
801 // Cascade values
802 // Cr not set; inherit Cb. Cb not set; inherit Y. Y not set; defaults.
803 for (int c = 1; c < 3; c++)
805 if (pv->strength[c] == -1) { pv->strength[c] = pv->strength[c-1]; }
806 if (pv->origin_tune[c] == -1) { pv->origin_tune[c] = pv->origin_tune[c-1]; }
807 if (pv->patch_size[c] == -1) { pv->patch_size[c] = pv->patch_size[c-1]; }
808 if (pv->range[c] == -1) { pv->range[c] = pv->range[c-1]; }
809 if (pv->nframes[c] == -1) { pv->nframes[c] = pv->nframes[c-1]; }
810 if (pv->prefilter[c] == -1) { pv->prefilter[c] = pv->prefilter[c-1]; }
813 for (int c = 0; c < 3; c++)
815 // Replace unset values with defaults
816 if (pv->strength[c] == -1) { pv->strength[c] = c ? NLMEANS_STRENGTH_LUMA_DEFAULT : NLMEANS_STRENGTH_CHROMA_DEFAULT; }
817 if (pv->origin_tune[c] == -1) { pv->origin_tune[c] = c ? NLMEANS_ORIGIN_TUNE_LUMA_DEFAULT : NLMEANS_ORIGIN_TUNE_CHROMA_DEFAULT; }
818 if (pv->patch_size[c] == -1) { pv->patch_size[c] = c ? NLMEANS_PATCH_SIZE_LUMA_DEFAULT : NLMEANS_PATCH_SIZE_CHROMA_DEFAULT; }
819 if (pv->range[c] == -1) { pv->range[c] = c ? NLMEANS_RANGE_LUMA_DEFAULT : NLMEANS_RANGE_CHROMA_DEFAULT; }
820 if (pv->nframes[c] == -1) { pv->nframes[c] = c ? NLMEANS_FRAMES_LUMA_DEFAULT : NLMEANS_FRAMES_CHROMA_DEFAULT; }
821 if (pv->prefilter[c] == -1) { pv->prefilter[c] = c ? NLMEANS_PREFILTER_LUMA_DEFAULT : NLMEANS_PREFILTER_CHROMA_DEFAULT; }
823 // Sanitize
824 if (pv->strength[c] < 0) { pv->strength[c] = 0; }
825 if (pv->origin_tune[c] < 0.01) { pv->origin_tune[c] = 0.01; } // avoid black artifacts
826 if (pv->origin_tune[c] > 1) { pv->origin_tune[c] = 1; }
827 if (pv->patch_size[c] % 2 == 0) { pv->patch_size[c]--; }
828 if (pv->patch_size[c] < 1) { pv->patch_size[c] = 1; }
829 if (pv->range[c] % 2 == 0) { pv->range[c]--; }
830 if (pv->range[c] < 1) { pv->range[c] = 1; }
831 if (pv->nframes[c] < 1) { pv->nframes[c] = 1; }
832 if (pv->nframes[c] > NLMEANS_FRAMES_MAX) { pv->nframes[c] = NLMEANS_FRAMES_MAX; }
833 if (pv->prefilter[c] < 0) { pv->prefilter[c] = 0; }
835 if (pv->max_frames < pv->nframes[c]) pv->max_frames = pv->nframes[c];
837 // Precompute exponential table
838 float *exptable = &pv->exptable[c][0];
839 float *weight_fact_table = &pv->weight_fact_table[c];
840 int *diff_max = &pv->diff_max[c];
841 const float weight_factor = 1.0/pv->patch_size[c]/pv->patch_size[c] / (pv->strength[c] * pv->strength[c]);
842 const float min_weight_in_table = 0.0005;
843 const float stretch = NLMEANS_EXPSIZE / (-log(min_weight_in_table));
844 *(weight_fact_table) = weight_factor * stretch;
845 *(diff_max) = NLMEANS_EXPSIZE / *(weight_fact_table);
846 for (int i = 0; i < NLMEANS_EXPSIZE; i++)
848 exptable[i] = exp(-i/stretch);
850 exptable[NLMEANS_EXPSIZE-1] = 0;
853 pv->thread_count = hb_get_cpu_count();
854 pv->frame = calloc(pv->thread_count + pv->max_frames, sizeof(Frame));
855 for (int ii = 0; ii < pv->thread_count + pv->max_frames; ii++)
857 for (int c = 0; c < 3; c++)
859 pv->frame[ii].plane[c].mutex = hb_lock_init();
863 pv->thread_data = malloc(pv->thread_count * sizeof(nlmeans_thread_arg_t*));
864 if (taskset_init(&pv->taskset, pv->thread_count,
865 sizeof(nlmeans_thread_arg_t)) == 0)
867 hb_error("NLMeans could not initialize taskset");
868 goto fail;
871 for (int ii = 0; ii < pv->thread_count; ii++)
873 pv->thread_data[ii] = taskset_thread_args(&pv->taskset, ii);
874 if (pv->thread_data[ii] == NULL)
876 hb_error("NLMeans could not create thread args");
877 goto fail;
879 pv->thread_data[ii]->pv = pv;
880 pv->thread_data[ii]->segment = ii;
881 if (taskset_thread_spawn(&pv->taskset, ii, "nlmeans_filter",
882 nlmeans_filter_thread, HB_NORMAL_PRIORITY) == 0)
884 hb_error("NLMeans could not spawn thread");
885 goto fail;
889 return 0;
891 fail:
892 taskset_fini(&pv->taskset);
893 free(pv->thread_data);
894 free(pv);
895 return -1;
898 static void nlmeans_close(hb_filter_object_t *filter)
900 hb_filter_private_t *pv = filter->private_data;
902 if (pv == NULL)
904 return;
907 taskset_fini(&pv->taskset);
908 for (int c = 0; c < 3; c++)
910 for (int f = 0; f < pv->nframes[c]; f++)
912 if (pv->frame[f].plane[c].mem_pre != NULL &&
913 pv->frame[f].plane[c].mem_pre != pv->frame[f].plane[c].mem)
915 free(pv->frame[f].plane[c].mem_pre);
916 pv->frame[f].plane[c].mem_pre = NULL;
918 if (pv->frame[f].plane[c].mem != NULL)
920 free(pv->frame[f].plane[c].mem);
921 pv->frame[f].plane[c].mem = NULL;
926 for (int ii = 0; ii < pv->thread_count + pv->max_frames; ii++)
928 for (int c = 0; c < 3; c++)
930 hb_lock_close(&pv->frame[ii].plane[c].mutex);
934 free(pv->frame);
935 free(pv->thread_data);
936 free(pv);
937 filter->private_data = NULL;
940 static void nlmeans_filter_thread(void *thread_args_v)
942 nlmeans_thread_arg_t *thread_data = thread_args_v;
943 hb_filter_private_t *pv = thread_data->pv;
944 int segment = thread_data->segment;
946 hb_log("NLMeans thread started for segment %d", segment);
948 while (1)
950 // Wait until there is work to do.
951 taskset_thread_wait4start(&pv->taskset, segment);
953 if (taskset_thread_stop(&pv->taskset, segment))
955 break;
958 Frame *frame = &pv->frame[segment];
959 hb_buffer_t *buf;
960 buf = hb_frame_buffer_init(frame->fmt, frame->width, frame->height);
962 NLMeansFunctions *functions = &pv->functions;
964 for (int c = 0; c < 3; c++)
966 if (pv->strength[c] == 0)
968 nlmeans_deborder(&frame->plane[c], buf->plane[c].data,
969 buf->plane[c].width, buf->plane[c].stride,
970 buf->plane[c].height);
971 continue;
973 if (pv->prefilter[c] & NLMEANS_PREFILTER_MODE_PASSTHRU)
975 nlmeans_prefilter(&pv->frame->plane[c], pv->prefilter[c]);
976 nlmeans_deborder(&frame->plane[c], buf->plane[c].data,
977 buf->plane[c].width, buf->plane[c].stride,
978 buf->plane[c].height);
979 continue;
982 // Process current plane
983 nlmeans_plane(functions,
984 frame,
985 pv->prefilter[c],
987 pv->nframes[c],
988 buf->plane[c].data,
989 buf->plane[c].width,
990 buf->plane[c].stride,
991 buf->plane[c].height,
992 pv->strength[c],
993 pv->origin_tune[c],
994 pv->patch_size[c],
995 pv->range[c],
996 pv->exptable[c],
997 pv->weight_fact_table[c],
998 pv->diff_max[c]);
1000 buf->s = pv->frame[segment].s;
1001 thread_data->out = buf;
1003 // Finished this segment, notify.
1004 taskset_thread_complete(&pv->taskset, segment);
1006 taskset_thread_complete(&pv->taskset, segment);
1009 static void nlmeans_add_frame(hb_filter_private_t *pv, hb_buffer_t *buf)
1011 for (int c = 0; c < 3; c++)
1013 // Extend copy of plane with extra border and place in buffer
1014 const int border = ((pv->range[c] + 2) / 2 + 15) / 16 * 16;
1015 nlmeans_alloc(buf->plane[c].data,
1016 buf->plane[c].width,
1017 buf->plane[c].stride,
1018 buf->plane[c].height,
1019 &pv->frame[pv->next_frame].plane[c],
1020 border);
1021 pv->frame[pv->next_frame].s = buf->s;
1022 pv->frame[pv->next_frame].width = buf->f.width;
1023 pv->frame[pv->next_frame].height = buf->f.height;
1024 pv->frame[pv->next_frame].fmt = buf->f.fmt;
1026 pv->next_frame++;
1029 static hb_buffer_t * nlmeans_filter(hb_filter_private_t *pv)
1031 if (pv->next_frame < pv->max_frames + pv->thread_count)
1033 return NULL;
1036 taskset_cycle(&pv->taskset);
1038 // Free buffers that are not needed for next taskset cycle
1039 for (int c = 0; c < 3; c++)
1041 for (int t = 0; t < pv->thread_count; t++)
1043 // Release last frame in buffer
1044 if (pv->frame[t].plane[c].mem_pre != NULL &&
1045 pv->frame[t].plane[c].mem_pre != pv->frame[t].plane[c].mem)
1047 free(pv->frame[t].plane[c].mem_pre);
1048 pv->frame[t].plane[c].mem_pre = NULL;
1050 if (pv->frame[t].plane[c].mem != NULL)
1052 free(pv->frame[t].plane[c].mem);
1053 pv->frame[t].plane[c].mem = NULL;
1057 // Shift frames in buffer down
1058 for (int f = 0; f < pv->max_frames; f++)
1060 // Don't move the mutex!
1061 Frame frame = pv->frame[f];
1062 pv->frame[f] = pv->frame[f+pv->thread_count];
1063 for (int c = 0; c < 3; c++)
1065 pv->frame[f].plane[c].mutex = frame.plane[c].mutex;
1066 pv->frame[f+pv->thread_count].plane[c].mem_pre = NULL;
1067 pv->frame[f+pv->thread_count].plane[c].mem = NULL;
1070 pv->next_frame -= pv->thread_count;
1072 // Collect results from taskset
1073 hb_buffer_t *last = NULL, *out = NULL;
1074 for (int t = 0; t < pv->thread_count; t++)
1076 if (out == NULL)
1078 out = last = pv->thread_data[t]->out;
1080 else
1082 last->next = pv->thread_data[t]->out;
1083 last = pv->thread_data[t]->out;
1086 return out;
1089 static hb_buffer_t * nlmeans_filter_flush(hb_filter_private_t *pv)
1091 hb_buffer_t *out = NULL, *last = NULL;
1093 for (int f = 0; f < pv->next_frame; f++)
1095 Frame *frame = &pv->frame[f];
1096 hb_buffer_t *buf;
1097 buf = hb_frame_buffer_init(frame->fmt, frame->width, frame->height);
1099 NLMeansFunctions *functions = &pv->functions;
1101 for (int c = 0; c < 3; c++)
1103 if (pv->strength[c] == 0)
1105 nlmeans_deborder(&frame->plane[c], buf->plane[c].data,
1106 buf->plane[c].width, buf->plane[c].stride,
1107 buf->plane[c].height);
1108 continue;
1110 if (pv->prefilter[c] & NLMEANS_PREFILTER_MODE_PASSTHRU)
1112 nlmeans_prefilter(&pv->frame[f].plane[c], pv->prefilter[c]);
1113 nlmeans_deborder(&frame->plane[c], buf->plane[c].data,
1114 buf->plane[c].width, buf->plane[c].stride,
1115 buf->plane[c].height);
1116 continue;
1119 int nframes = pv->next_frame - f;
1120 if (pv->nframes[c] < nframes)
1122 nframes = pv->nframes[c];
1124 // Process current plane
1125 nlmeans_plane(functions,
1126 frame,
1127 pv->prefilter[c],
1129 nframes,
1130 buf->plane[c].data,
1131 buf->plane[c].width,
1132 buf->plane[c].stride,
1133 buf->plane[c].height,
1134 pv->strength[c],
1135 pv->origin_tune[c],
1136 pv->patch_size[c],
1137 pv->range[c],
1138 pv->exptable[c],
1139 pv->weight_fact_table[c],
1140 pv->diff_max[c]);
1142 buf->s = frame->s;
1143 if (out == NULL)
1145 out = last = buf;
1147 else
1149 last->next = buf;
1150 last = buf;
1153 return out;
1156 static int nlmeans_work(hb_filter_object_t *filter,
1157 hb_buffer_t **buf_in,
1158 hb_buffer_t **buf_out )
1160 hb_filter_private_t *pv = filter->private_data;
1161 hb_buffer_t *in = *buf_in;
1163 if (in->s.flags & HB_BUF_FLAG_EOF)
1165 hb_buffer_t *last;
1166 // Flush buffered frames
1167 last = *buf_out = nlmeans_filter_flush(pv);
1169 // And terminate the buffer list with a null buffer
1170 if (last != NULL)
1172 while (last->next != NULL)
1173 last = last->next;
1174 last->next = in;
1176 else
1178 *buf_out = in;
1180 *buf_in = NULL;
1181 return HB_FILTER_DONE;
1184 nlmeans_add_frame(pv, in);
1185 *buf_out = nlmeans_filter(pv);
1187 return HB_FILTER_OK;