3 Copyright (c) 2013 Dirk Farin
4 Copyright (c) 2003-2015 HandBrake Team
5 This file is part of the HandBrake source code
6 Homepage: <http://handbrake.fr/>.
7 It may be used under the terms of the GNU General Public License v2.
8 For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html
14 * lumaY_strength : lumaY_origin_tune : lumaY_patch_size : lumaY_range : lumaY_frames : lumaY_prefilter :
15 * chromaB_strength : chromaB_origin_tune : chromaB_patch_size : chromaB_range : chromaB_frames : chromaB_prefilter :
16 * chromaR_strength : chromaR_origin_tune : chromaR_patch_size : chromaR_range : chromaR_frames : chromaR_prefilter
19 * 8:1:7:3:2:0 for each channel (equivalent to 8:1:7:3:2:0:8:1:7:3:2:0:8:1:7:3:2:0)
21 * Parameters cascade, e.g. 6:0.8:7:3:3:0:4:1 sets:
22 * strength 6, origin tune 0.8 for luma
23 * patch size 7, range 3, frames 3, prefilter 0 for all channels
24 * strength 4, origin tune 1 for both chroma channels
26 * Strength is relative and must be adjusted; ALL parameters affect overall strength.
27 * Lower origin tune improves results for noisier input or animation (film 0.5-1, animation 0.15-0.5).
28 * Large patch size (>9) may greatly reduce quality by clobbering detail.
29 * Larger search range increases quality; however, computation time increases exponentially.
30 * Large number of frames (film >3, animation >6) may cause temporal smearing.
31 * Prefiltering can potentially improve weight decisions, yielding better results for difficult sources.
33 * Prefilter enum combos:
36 * 3: Mean 5x5 (overrides Mean 3x3)
37 * 257: Mean 3x3 reduced by 25%
38 * 258: Mean 5x5 reduced by 25%
39 * 513: Mean 3x3 reduced by 50%
40 * 514: Mean 5x5 reduced by 50%
41 * 769: Mean 3x3 reduced by 75%
42 * 770: Mean 5x5 reduced by 75%
43 * 1025: Mean 3x3 plus edge boost (restores lost edge detail)
44 * 1026: Mean 5x5 plus edge boost
45 * 1281: Mean 3x3 reduced by 25% plus edge boost
47 * 2049: Mean 3x3 passthru (NLMeans off, prefilter is the output)
49 * 3329: Mean 3x3 reduced by 25% plus edge boost, passthru
58 #define NLMEANS_STRENGTH_LUMA_DEFAULT 6
59 #define NLMEANS_STRENGTH_CHROMA_DEFAULT 6
60 #define NLMEANS_ORIGIN_TUNE_LUMA_DEFAULT 1
61 #define NLMEANS_ORIGIN_TUNE_CHROMA_DEFAULT 1
62 #define NLMEANS_PATCH_SIZE_LUMA_DEFAULT 7
63 #define NLMEANS_PATCH_SIZE_CHROMA_DEFAULT 7
64 #define NLMEANS_RANGE_LUMA_DEFAULT 3
65 #define NLMEANS_RANGE_CHROMA_DEFAULT 3
66 #define NLMEANS_FRAMES_LUMA_DEFAULT 2
67 #define NLMEANS_FRAMES_CHROMA_DEFAULT 2
68 #define NLMEANS_PREFILTER_LUMA_DEFAULT 0
69 #define NLMEANS_PREFILTER_CHROMA_DEFAULT 0
71 #define NLMEANS_PREFILTER_MODE_MEAN3X3 1
72 #define NLMEANS_PREFILTER_MODE_MEAN5X5 2
73 #define NLMEANS_PREFILTER_MODE_MEDIAN3X3 4
74 #define NLMEANS_PREFILTER_MODE_MEDIAN5X5 8
75 #define NLMEANS_PREFILTER_MODE_RESERVED16 16 // Reserved
76 #define NLMEANS_PREFILTER_MODE_RESERVED32 32 // Reserved
77 #define NLMEANS_PREFILTER_MODE_RESERVED64 64 // Reserved
78 #define NLMEANS_PREFILTER_MODE_RESERVED128 128 // Reserved
79 #define NLMEANS_PREFILTER_MODE_REDUCE25 256
80 #define NLMEANS_PREFILTER_MODE_REDUCE50 512
81 #define NLMEANS_PREFILTER_MODE_EDGEBOOST 1024
82 #define NLMEANS_PREFILTER_MODE_PASSTHRU 2048
84 #define NLMEANS_SORT(a,b) { if (a > b) NLMEANS_SWAP(a, b); }
85 #define NLMEANS_SWAP(a,b) { a = (a ^ b); b = (a ^ b); a = (b ^ a); }
87 #define NLMEANS_FRAMES_MAX 32
88 #define NLMEANS_EXPSIZE 128
108 BorderedPlane plane
[3];
109 hb_buffer_settings_t s
;
120 hb_filter_private_t
*pv
;
123 } nlmeans_thread_arg_t
;
125 struct hb_filter_private_s
127 double strength
[3]; // averaging weight decay, larger produces smoother output
128 double origin_tune
[3]; // weight tuning for origin patch, 0.00..1.00
129 int patch_size
[3]; // pixel context region width (must be odd)
130 int range
[3]; // spatial search window width (must be odd)
131 int nframes
[3]; // temporal search depth in frames
132 int prefilter
[3]; // prefilter mode, can improve weight analysis
134 float exptable
[3][NLMEANS_EXPSIZE
];
135 float weight_fact_table
[3];
138 NLMeansFunctions functions
;
146 nlmeans_thread_arg_t
**thread_data
;
149 static int nlmeans_init(hb_filter_object_t
*filter
, hb_filter_init_t
*init
);
150 static int nlmeans_work(hb_filter_object_t
*filter
,
151 hb_buffer_t
**buf_in
,
152 hb_buffer_t
**buf_out
);
153 static void nlmeans_close(hb_filter_object_t
*filter
);
155 static void nlmeans_filter_thread(void *thread_args_v
);
157 hb_filter_object_t hb_filter_nlmeans
=
159 .id
= HB_FILTER_NLMEANS
,
161 .name
= "Denoise (nlmeans)",
163 .init
= nlmeans_init
,
164 .work
= nlmeans_work
,
165 .close
= nlmeans_close
,
168 static void nlmeans_border(uint8_t *src
,
173 const int bw
= w
+ 2 * border
;
174 uint8_t *image
= src
+ border
+ bw
* border
;
176 // Create faux borders using edge pixels
177 for (int y
= 0; y
< h
; y
++)
179 for (int x
= 0; x
< border
; x
++)
181 *(image
+ y
*bw
- x
- 1) = *(image
+ y
*bw
+ x
);
182 *(image
+ y
*bw
+ x
+ w
) = *(image
+ y
*bw
- x
+ (w
-1));
185 for (int y
= 0; y
< border
; y
++)
187 memcpy(image
- border
- (y
+1)*bw
, image
- border
+ y
*bw
, bw
);
188 memcpy(image
- border
+ (y
+h
)*bw
, image
- border
+ (h
-y
-1)*bw
, bw
);
193 static void nlmeans_deborder(const BorderedPlane
*src
,
199 const int bw
= src
->w
+ 2 * src
->border
;
200 uint8_t *image
= src
->mem
+ src
->border
+ bw
* src
->border
;
209 for (int y
= 0; y
< h
; y
++)
211 memcpy(dst
+ y
* s
, image
+ y
* bw
, width
);
216 static void nlmeans_alloc(const uint8_t *src
,
223 const int bw
= src_w
+ 2 * border
;
224 const int bh
= src_h
+ 2 * border
;
226 uint8_t *mem
= malloc(bw
* bh
* sizeof(uint8_t));
227 uint8_t *image
= mem
+ border
+ bw
* border
;
230 for (int y
= 0; y
< src_h
; y
++)
232 memcpy(image
+ y
* bw
, src
+ y
* src_s
, src_w
);
239 dst
->border
= border
;
241 nlmeans_border(dst
->mem
, dst
->w
, dst
->h
, dst
->border
);
242 dst
->mem_pre
= dst
->mem
;
243 dst
->image_pre
= dst
->image
;
247 static void nlmeans_filter_mean(const uint8_t *src
,
256 const int bw
= w
+ 2 * border
;
257 const int offset_min
= -((size
- 1) /2);
258 const int offset_max
= (size
+ 1) /2;
259 const double pixel_weight
= 1.0 / (size
* size
);
261 for (int y
= 0; y
< h
; y
++)
263 for (int x
= 0; x
< w
; x
++)
266 for (int k
= offset_min
; k
< offset_max
; k
++)
268 for (int j
= offset_min
; j
< offset_max
; j
++)
270 pixel_sum
= pixel_sum
+ *(src
+ bw
*(y
+j
) + (x
+k
));
273 *(dst
+ bw
*y
+ x
) = (uint8_t)(pixel_sum
* pixel_weight
);
279 static uint8_t nlmeans_filter_median_opt(uint8_t *pixels
,
283 // Optimized sorting networks
286 /* opt_med9() via Nicolas Devillard
287 * http://ndevilla.free.fr/median/median.pdf
289 NLMEANS_SORT(pixels
[1], pixels
[2]); NLMEANS_SORT(pixels
[4], pixels
[5]); NLMEANS_SORT(pixels
[7], pixels
[8]);
290 NLMEANS_SORT(pixels
[0], pixels
[1]); NLMEANS_SORT(pixels
[3], pixels
[4]); NLMEANS_SORT(pixels
[6], pixels
[7]);
291 NLMEANS_SORT(pixels
[1], pixels
[2]); NLMEANS_SORT(pixels
[4], pixels
[5]); NLMEANS_SORT(pixels
[7], pixels
[8]);
292 NLMEANS_SORT(pixels
[0], pixels
[3]); NLMEANS_SORT(pixels
[5], pixels
[8]); NLMEANS_SORT(pixels
[4], pixels
[7]);
293 NLMEANS_SORT(pixels
[3], pixels
[6]); NLMEANS_SORT(pixels
[1], pixels
[4]); NLMEANS_SORT(pixels
[2], pixels
[5]);
294 NLMEANS_SORT(pixels
[4], pixels
[7]); NLMEANS_SORT(pixels
[4], pixels
[2]); NLMEANS_SORT(pixels
[6], pixels
[4]);
295 NLMEANS_SORT(pixels
[4], pixels
[2]);
300 /* opt_med25() via Nicolas Devillard
301 * http://ndevilla.free.fr/median/median.pdf
303 NLMEANS_SORT(pixels
[0], pixels
[1]); NLMEANS_SORT(pixels
[3], pixels
[4]); NLMEANS_SORT(pixels
[2], pixels
[4]);
304 NLMEANS_SORT(pixels
[2], pixels
[3]); NLMEANS_SORT(pixels
[6], pixels
[7]); NLMEANS_SORT(pixels
[5], pixels
[7]);
305 NLMEANS_SORT(pixels
[5], pixels
[6]); NLMEANS_SORT(pixels
[9], pixels
[10]); NLMEANS_SORT(pixels
[8], pixels
[10]);
306 NLMEANS_SORT(pixels
[8], pixels
[9]); NLMEANS_SORT(pixels
[12], pixels
[13]); NLMEANS_SORT(pixels
[11], pixels
[13]);
307 NLMEANS_SORT(pixels
[11], pixels
[12]); NLMEANS_SORT(pixels
[15], pixels
[16]); NLMEANS_SORT(pixels
[14], pixels
[16]);
308 NLMEANS_SORT(pixels
[14], pixels
[15]); NLMEANS_SORT(pixels
[18], pixels
[19]); NLMEANS_SORT(pixels
[17], pixels
[19]);
309 NLMEANS_SORT(pixels
[17], pixels
[18]); NLMEANS_SORT(pixels
[21], pixels
[22]); NLMEANS_SORT(pixels
[20], pixels
[22]);
310 NLMEANS_SORT(pixels
[20], pixels
[21]); NLMEANS_SORT(pixels
[23], pixels
[24]); NLMEANS_SORT(pixels
[2], pixels
[5]);
311 NLMEANS_SORT(pixels
[3], pixels
[6]); NLMEANS_SORT(pixels
[0], pixels
[6]); NLMEANS_SORT(pixels
[0], pixels
[3]);
312 NLMEANS_SORT(pixels
[4], pixels
[7]); NLMEANS_SORT(pixels
[1], pixels
[7]); NLMEANS_SORT(pixels
[1], pixels
[4]);
313 NLMEANS_SORT(pixels
[11], pixels
[14]); NLMEANS_SORT(pixels
[8], pixels
[14]); NLMEANS_SORT(pixels
[8], pixels
[11]);
314 NLMEANS_SORT(pixels
[12], pixels
[15]); NLMEANS_SORT(pixels
[9], pixels
[15]); NLMEANS_SORT(pixels
[9], pixels
[12]);
315 NLMEANS_SORT(pixels
[13], pixels
[16]); NLMEANS_SORT(pixels
[10], pixels
[16]); NLMEANS_SORT(pixels
[10], pixels
[13]);
316 NLMEANS_SORT(pixels
[20], pixels
[23]); NLMEANS_SORT(pixels
[17], pixels
[23]); NLMEANS_SORT(pixels
[17], pixels
[20]);
317 NLMEANS_SORT(pixels
[21], pixels
[24]); NLMEANS_SORT(pixels
[18], pixels
[24]); NLMEANS_SORT(pixels
[18], pixels
[21]);
318 NLMEANS_SORT(pixels
[19], pixels
[22]); NLMEANS_SORT(pixels
[8], pixels
[17]); NLMEANS_SORT(pixels
[9], pixels
[18]);
319 NLMEANS_SORT(pixels
[0], pixels
[18]); NLMEANS_SORT(pixels
[0], pixels
[9]); NLMEANS_SORT(pixels
[10], pixels
[19]);
320 NLMEANS_SORT(pixels
[1], pixels
[19]); NLMEANS_SORT(pixels
[1], pixels
[10]); NLMEANS_SORT(pixels
[11], pixels
[20]);
321 NLMEANS_SORT(pixels
[2], pixels
[20]); NLMEANS_SORT(pixels
[2], pixels
[11]); NLMEANS_SORT(pixels
[12], pixels
[21]);
322 NLMEANS_SORT(pixels
[3], pixels
[21]); NLMEANS_SORT(pixels
[3], pixels
[12]); NLMEANS_SORT(pixels
[13], pixels
[22]);
323 NLMEANS_SORT(pixels
[4], pixels
[22]); NLMEANS_SORT(pixels
[4], pixels
[13]); NLMEANS_SORT(pixels
[14], pixels
[23]);
324 NLMEANS_SORT(pixels
[5], pixels
[23]); NLMEANS_SORT(pixels
[5], pixels
[14]); NLMEANS_SORT(pixels
[15], pixels
[24]);
325 NLMEANS_SORT(pixels
[6], pixels
[24]); NLMEANS_SORT(pixels
[6], pixels
[15]); NLMEANS_SORT(pixels
[7], pixels
[16]);
326 NLMEANS_SORT(pixels
[7], pixels
[19]); NLMEANS_SORT(pixels
[13], pixels
[21]); NLMEANS_SORT(pixels
[15], pixels
[23]);
327 NLMEANS_SORT(pixels
[7], pixels
[13]); NLMEANS_SORT(pixels
[7], pixels
[15]); NLMEANS_SORT(pixels
[1], pixels
[9]);
328 NLMEANS_SORT(pixels
[3], pixels
[11]); NLMEANS_SORT(pixels
[5], pixels
[17]); NLMEANS_SORT(pixels
[11], pixels
[17]);
329 NLMEANS_SORT(pixels
[9], pixels
[17]); NLMEANS_SORT(pixels
[4], pixels
[10]); NLMEANS_SORT(pixels
[6], pixels
[12]);
330 NLMEANS_SORT(pixels
[7], pixels
[14]); NLMEANS_SORT(pixels
[4], pixels
[6]); NLMEANS_SORT(pixels
[4], pixels
[7]);
331 NLMEANS_SORT(pixels
[12], pixels
[14]); NLMEANS_SORT(pixels
[10], pixels
[14]); NLMEANS_SORT(pixels
[6], pixels
[7]);
332 NLMEANS_SORT(pixels
[10], pixels
[12]); NLMEANS_SORT(pixels
[6], pixels
[10]); NLMEANS_SORT(pixels
[6], pixels
[17]);
333 NLMEANS_SORT(pixels
[12], pixels
[17]); NLMEANS_SORT(pixels
[7], pixels
[17]); NLMEANS_SORT(pixels
[7], pixels
[10]);
334 NLMEANS_SORT(pixels
[12], pixels
[18]); NLMEANS_SORT(pixels
[7], pixels
[12]); NLMEANS_SORT(pixels
[10], pixels
[18]);
335 NLMEANS_SORT(pixels
[12], pixels
[20]); NLMEANS_SORT(pixels
[10], pixels
[20]); NLMEANS_SORT(pixels
[10], pixels
[12]);
339 // Network for size not implemented
340 return pixels
[(int)((size
* size
)/2)];
344 static void nlmeans_filter_median(const uint8_t *src
,
352 const int bw
= w
+ 2 * border
;
353 const int offset_min
= -((size
- 1) /2);
354 const int offset_max
= (size
+ 1) /2;
356 uint8_t pixels
[size
* size
];
357 for (int y
= 0; y
< h
; y
++)
359 for (int x
= 0; x
< w
; x
++)
362 for (int k
= offset_min
; k
< offset_max
; k
++)
364 for (int j
= offset_min
; j
< offset_max
; j
++)
366 pixels
[index
] = *(src
+ bw
*(y
+j
) + (x
+k
));
370 *(dst
+ bw
*y
+ x
) = nlmeans_filter_median_opt(pixels
, size
);
376 static void nlmeans_filter_edgeboost(const uint8_t *src
,
382 const int bw
= w
+ 2 * border
;
383 const int bh
= h
+ 2 * border
;
386 const int kernel_size
= 3;
387 const int kernel
[3][3] = {{-31, 0, 31},
390 const double kernel_coef
= 1.0 / 126.42;
393 const int offset_min
= -((kernel_size
- 1) /2);
394 const int offset_max
= (kernel_size
+ 1) /2;
397 uint8_t *mask_mem
= calloc(bw
* bh
, sizeof(uint8_t));
398 uint8_t *mask
= mask_mem
+ border
+ bw
* border
;
399 for (int y
= 0; y
< h
; y
++)
401 for (int x
= 0; x
< w
; x
++)
405 for (int k
= offset_min
; k
< offset_max
; k
++)
407 for (int j
= offset_min
; j
< offset_max
; j
++)
409 pixel1
+= kernel
[j
+1][k
+1] * *(src
+ bw
*(y
+j
) + (x
+k
));
410 pixel2
+= kernel
[k
+1][j
+1] * *(src
+ bw
*(y
+j
) + (x
+k
));
413 pixel1
= pixel1
> 0 ? pixel1
: -pixel1
;
414 pixel2
= pixel2
> 0 ? pixel2
: -pixel2
;
415 pixel1
= (uint16_t)(((double)pixel1
* kernel_coef
) + 128);
416 pixel2
= (uint16_t)(((double)pixel2
* kernel_coef
) + 128);
417 *(mask
+ bw
*y
+ x
) = (uint8_t)(pixel1
+ pixel2
);
418 if (*(mask
+ bw
*y
+ x
) > 160)
420 *(mask
+ bw
*y
+ x
) = 235;
422 else if (*(mask
+ bw
*y
+ x
) > 16)
424 *(mask
+ bw
*y
+ x
) = 128;
428 *(mask
+ bw
*y
+ x
) = 16;
433 // Post-process and output
435 for (int y
= 0; y
< h
; y
++)
437 for (int x
= 0; x
< w
; x
++)
439 if (*(mask
+ bw
*y
+ x
) > 16)
441 // Count nearby edge pixels
443 for (int k
= offset_min
; k
< offset_max
; k
++)
445 for (int j
= offset_min
; j
< offset_max
; j
++)
447 if (*(mask
+ bw
*(y
+j
) + (x
+k
)) > 16)
453 // Remove false positive
456 *(mask
+ bw
*y
+ x
) = 16;
459 if (*(mask
+ bw
*y
+ x
) > 16)
461 if (*(mask
+ bw
*y
+ x
) == 235)
463 *(dst
+ bw
*y
+ x
) = (3 * *(src
+ bw
*y
+ x
) + 1 * *(dst
+ bw
*y
+ x
)) /4;
467 *(dst
+ bw
*y
+ x
) = (2 * *(src
+ bw
*y
+ x
) + 3 * *(dst
+ bw
*y
+ x
)) /5;
469 //*(dst + bw*y + x) = *(mask + bw*y + x); // Overlay mask
472 //*(dst + bw*y + x) = *(mask + bw*y + x); // Full mask
479 static void nlmeans_prefilter(BorderedPlane
*src
,
480 const int filter_type
)
483 if (src
->prefiltered
)
485 hb_unlock(src
->mutex
);
489 if (filter_type
& NLMEANS_PREFILTER_MODE_MEAN3X3
||
490 filter_type
& NLMEANS_PREFILTER_MODE_MEAN5X5
||
491 filter_type
& NLMEANS_PREFILTER_MODE_MEDIAN3X3
||
492 filter_type
& NLMEANS_PREFILTER_MODE_MEDIAN5X5
)
496 const uint8_t *mem
= src
->mem
;
497 const uint8_t *image
= src
->image
;
498 const int border
= src
->border
;
499 const int w
= src
->w
;
500 const int h
= src
->h
;
501 const int bw
= w
+ 2 * border
;
502 const int bh
= h
+ 2 * border
;
505 uint8_t *mem_pre
= malloc(bw
* bh
* sizeof(uint8_t));
506 uint8_t *image_pre
= mem_pre
+ border
+ bw
* border
;
507 for (int y
= 0; y
< h
; y
++)
509 memcpy(mem_pre
+ y
* bw
, mem
+ y
* bw
, bw
);
512 // Filter plane; should already have at least 2px extra border on each side
513 if (filter_type
& NLMEANS_PREFILTER_MODE_MEDIAN5X5
)
516 nlmeans_filter_median(image
, image_pre
, w
, h
, border
, 5);
518 else if (filter_type
& NLMEANS_PREFILTER_MODE_MEDIAN3X3
)
521 nlmeans_filter_median(image
, image_pre
, w
, h
, border
, 3);
523 else if (filter_type
& NLMEANS_PREFILTER_MODE_MEAN5X5
)
526 nlmeans_filter_mean(image
, image_pre
, w
, h
, border
, 5);
528 else if (filter_type
& NLMEANS_PREFILTER_MODE_MEAN3X3
)
531 nlmeans_filter_mean(image
, image_pre
, w
, h
, border
, 3);
535 if (filter_type
& NLMEANS_PREFILTER_MODE_EDGEBOOST
)
537 nlmeans_filter_edgeboost(image
, image_pre
, w
, h
, border
);
540 // Blend source and destination for lesser effect
543 if (filter_type
& NLMEANS_PREFILTER_MODE_REDUCE50
&&
544 filter_type
& NLMEANS_PREFILTER_MODE_REDUCE25
)
549 else if (filter_type
& NLMEANS_PREFILTER_MODE_REDUCE50
)
554 else if (filter_type
& NLMEANS_PREFILTER_MODE_REDUCE25
)
561 for (int y
= 0; y
< bh
; y
++)
563 for (int x
= 0; x
< bw
; x
++)
565 *(mem_pre
+ bw
*y
+ x
) = (uint8_t)((wet
* *(mem_pre
+ bw
*y
+ x
) + dry
* *(mem
+ bw
*y
+ x
)) / (wet
+ dry
));
571 src
->mem_pre
= mem_pre
;
572 src
->image_pre
= image_pre
;
575 nlmeans_border(mem_pre
, w
, h
, border
);
578 src
->prefiltered
= 1;
579 hb_unlock(src
->mutex
);
582 static void build_integral_scalar(uint32_t *integral
,
585 const uint8_t *src_pre
,
586 const uint8_t *compare
,
587 const uint8_t *compare_pre
,
595 const int bw
= w
+ 2 * border
;
596 for (int y
= 0; y
< dst_h
; y
++)
598 const uint8_t *p1
= src_pre
+ y
*bw
;
599 const uint8_t *p2
= compare_pre
+ (y
+dy
)*bw
+ dx
;
600 uint32_t *out
= integral
+ (y
*integral_stride
);
602 for (int x
= 0; x
< dst_w
; x
++)
604 int diff
= *p1
- *p2
;
605 *out
= *(out
-1) + diff
* diff
;
613 out
= integral
+ y
*integral_stride
;
615 for (int x
= 0; x
< dst_w
; x
++)
617 *out
+= *(out
- integral_stride
);
624 static void nlmeans_plane(NLMeansFunctions
*functions
,
637 const float *exptable
,
638 const float weight_fact_table
,
641 const int n_half
= (n
-1) /2;
642 const int r_half
= (r
-1) /2;
645 const uint8_t *src
= frame
[0].plane
[plane
].image
;
646 const uint8_t *src_pre
= frame
[0].plane
[plane
].image_pre
;
647 const int w
= frame
[0].plane
[plane
].w
;
648 const int border
= frame
[0].plane
[plane
].border
;
649 const int bw
= w
+ 2 * border
;
651 // Allocate temporary pixel sums
652 struct PixelSum
*tmp_data
= calloc(dst_w
* dst_h
, sizeof(struct PixelSum
));
654 // Allocate integral image
655 const int integral_stride
= ((dst_w
+ 15) / 16 * 16) + 2 * 16;
656 uint32_t* const integral_mem
= calloc(integral_stride
* (dst_h
+1), sizeof(uint32_t));
657 uint32_t* const integral
= integral_mem
+ integral_stride
+ 16;
659 // Iterate through available frames
660 for (int f
= 0; f
< nframes
; f
++)
662 nlmeans_prefilter(&frame
[f
].plane
[plane
], prefilter
);
665 const uint8_t *compare
= frame
[f
].plane
[plane
].image
;
666 const uint8_t *compare_pre
= frame
[f
].plane
[plane
].image_pre
;
668 // Iterate through all displacements
669 for (int dy
= -r_half
; dy
<= r_half
; dy
++)
671 for (int dx
= -r_half
; dx
<= r_half
; dx
++)
674 // Apply special weight tuning to origin patch
675 if (dx
== 0 && dy
== 0 && f
== 0)
677 // TODO: Parallelize this
678 for (int y
= n_half
; y
< dst_h
-n
+ n_half
; y
++)
680 for (int x
= n_half
; x
< dst_w
-n
+ n_half
; x
++)
682 tmp_data
[y
*dst_w
+ x
].weight_sum
+= origin_tune
;
683 tmp_data
[y
*dst_w
+ x
].pixel_sum
+= origin_tune
* src
[y
*bw
+ x
];
690 functions
->build_integral(integral
,
703 // Average displacement
704 // TODO: Parallelize this
705 for (int y
= 0; y
<= dst_h
-n
; y
++)
707 const uint32_t *integral_ptr1
= integral
+ (y
-1)*integral_stride
- 1;
708 const uint32_t *integral_ptr2
= integral
+ (y
+n
-1)*integral_stride
- 1;
710 for (int x
= 0; x
<= dst_w
-n
; x
++)
712 const int xc
= x
+ n_half
;
713 const int yc
= y
+ n_half
;
715 // Difference between patches
716 const int diff
= (uint32_t)(integral_ptr2
[n
] - integral_ptr2
[0] - integral_ptr1
[n
] + integral_ptr1
[0]);
718 // Sum pixel with weight
721 const int diffidx
= diff
* weight_fact_table
;
723 //float weight = exp(-diff*weightFact);
724 const float weight
= exptable
[diffidx
];
726 tmp_data
[yc
*dst_w
+ xc
].weight_sum
+= weight
;
727 tmp_data
[yc
*dst_w
+ xc
].pixel_sum
+= weight
* compare
[(yc
+dy
)*bw
+ xc
+ dx
];
739 for (int y
= 0; y
< dst_h
; y
++)
741 for (int x
= 0; x
< n_half
; x
++)
743 *(dst
+ y
* dst_s
+ x
) = *(src
+ y
* bw
- x
- 1);
744 *(dst
+ y
* dst_s
- x
+ (dst_w
- 1)) = *(src
+ y
* bw
+ x
+ dst_w
);
747 for (int y
= 0; y
< n_half
; y
++)
749 memcpy(dst
+ y
*dst_s
, src
- (y
+1)*bw
, dst_w
);
750 memcpy(dst
+ (dst_h
-y
-1)*dst_s
, src
+ (y
+dst_h
)*bw
, dst_w
);
755 for (int y
= n_half
; y
< dst_h
-n_half
; y
++)
757 for (int x
= n_half
; x
< dst_w
-n_half
; x
++)
759 result
= (uint8_t)(tmp_data
[y
*dst_w
+ x
].pixel_sum
/ tmp_data
[y
*dst_w
+ x
].weight_sum
);
760 *(dst
+ y
*dst_s
+ x
) = result
? result
: *(src
+ y
*bw
+ x
);
769 static int nlmeans_init(hb_filter_object_t
*filter
,
770 hb_filter_init_t
*init
)
772 filter
->private_data
= calloc(sizeof(struct hb_filter_private_s
), 1);
773 hb_filter_private_t
*pv
= filter
->private_data
;
774 NLMeansFunctions
*functions
= &pv
->functions
;
776 functions
->build_integral
= build_integral_scalar
;
777 #if defined(ARCH_X86)
778 nlmeans_init_x86(functions
);
781 // Mark parameters unset
782 for (int c
= 0; c
< 3; c
++)
784 pv
->strength
[c
] = -1;
785 pv
->origin_tune
[c
] = -1;
786 pv
->patch_size
[c
] = -1;
789 pv
->prefilter
[c
] = -1;
792 // Read user parameters
793 if (filter
->settings
!= NULL
)
795 sscanf(filter
->settings
, "%lf:%lf:%d:%d:%d:%d:%lf:%lf:%d:%d:%d:%d:%lf:%lf:%d:%d:%d:%d",
796 &pv
->strength
[0], &pv
->origin_tune
[0], &pv
->patch_size
[0], &pv
->range
[0], &pv
->nframes
[0], &pv
->prefilter
[0],
797 &pv
->strength
[1], &pv
->origin_tune
[1], &pv
->patch_size
[1], &pv
->range
[1], &pv
->nframes
[1], &pv
->prefilter
[1],
798 &pv
->strength
[2], &pv
->origin_tune
[2], &pv
->patch_size
[2], &pv
->range
[2], &pv
->nframes
[2], &pv
->prefilter
[2]);
802 // Cr not set; inherit Cb. Cb not set; inherit Y. Y not set; defaults.
803 for (int c
= 1; c
< 3; c
++)
805 if (pv
->strength
[c
] == -1) { pv
->strength
[c
] = pv
->strength
[c
-1]; }
806 if (pv
->origin_tune
[c
] == -1) { pv
->origin_tune
[c
] = pv
->origin_tune
[c
-1]; }
807 if (pv
->patch_size
[c
] == -1) { pv
->patch_size
[c
] = pv
->patch_size
[c
-1]; }
808 if (pv
->range
[c
] == -1) { pv
->range
[c
] = pv
->range
[c
-1]; }
809 if (pv
->nframes
[c
] == -1) { pv
->nframes
[c
] = pv
->nframes
[c
-1]; }
810 if (pv
->prefilter
[c
] == -1) { pv
->prefilter
[c
] = pv
->prefilter
[c
-1]; }
813 for (int c
= 0; c
< 3; c
++)
815 // Replace unset values with defaults
816 if (pv
->strength
[c
] == -1) { pv
->strength
[c
] = c
? NLMEANS_STRENGTH_LUMA_DEFAULT
: NLMEANS_STRENGTH_CHROMA_DEFAULT
; }
817 if (pv
->origin_tune
[c
] == -1) { pv
->origin_tune
[c
] = c
? NLMEANS_ORIGIN_TUNE_LUMA_DEFAULT
: NLMEANS_ORIGIN_TUNE_CHROMA_DEFAULT
; }
818 if (pv
->patch_size
[c
] == -1) { pv
->patch_size
[c
] = c
? NLMEANS_PATCH_SIZE_LUMA_DEFAULT
: NLMEANS_PATCH_SIZE_CHROMA_DEFAULT
; }
819 if (pv
->range
[c
] == -1) { pv
->range
[c
] = c
? NLMEANS_RANGE_LUMA_DEFAULT
: NLMEANS_RANGE_CHROMA_DEFAULT
; }
820 if (pv
->nframes
[c
] == -1) { pv
->nframes
[c
] = c
? NLMEANS_FRAMES_LUMA_DEFAULT
: NLMEANS_FRAMES_CHROMA_DEFAULT
; }
821 if (pv
->prefilter
[c
] == -1) { pv
->prefilter
[c
] = c
? NLMEANS_PREFILTER_LUMA_DEFAULT
: NLMEANS_PREFILTER_CHROMA_DEFAULT
; }
824 if (pv
->strength
[c
] < 0) { pv
->strength
[c
] = 0; }
825 if (pv
->origin_tune
[c
] < 0.01) { pv
->origin_tune
[c
] = 0.01; } // avoid black artifacts
826 if (pv
->origin_tune
[c
] > 1) { pv
->origin_tune
[c
] = 1; }
827 if (pv
->patch_size
[c
] % 2 == 0) { pv
->patch_size
[c
]--; }
828 if (pv
->patch_size
[c
] < 1) { pv
->patch_size
[c
] = 1; }
829 if (pv
->range
[c
] % 2 == 0) { pv
->range
[c
]--; }
830 if (pv
->range
[c
] < 1) { pv
->range
[c
] = 1; }
831 if (pv
->nframes
[c
] < 1) { pv
->nframes
[c
] = 1; }
832 if (pv
->nframes
[c
] > NLMEANS_FRAMES_MAX
) { pv
->nframes
[c
] = NLMEANS_FRAMES_MAX
; }
833 if (pv
->prefilter
[c
] < 0) { pv
->prefilter
[c
] = 0; }
835 if (pv
->max_frames
< pv
->nframes
[c
]) pv
->max_frames
= pv
->nframes
[c
];
837 // Precompute exponential table
838 float *exptable
= &pv
->exptable
[c
][0];
839 float *weight_fact_table
= &pv
->weight_fact_table
[c
];
840 int *diff_max
= &pv
->diff_max
[c
];
841 const float weight_factor
= 1.0/pv
->patch_size
[c
]/pv
->patch_size
[c
] / (pv
->strength
[c
] * pv
->strength
[c
]);
842 const float min_weight_in_table
= 0.0005;
843 const float stretch
= NLMEANS_EXPSIZE
/ (-log(min_weight_in_table
));
844 *(weight_fact_table
) = weight_factor
* stretch
;
845 *(diff_max
) = NLMEANS_EXPSIZE
/ *(weight_fact_table
);
846 for (int i
= 0; i
< NLMEANS_EXPSIZE
; i
++)
848 exptable
[i
] = exp(-i
/stretch
);
850 exptable
[NLMEANS_EXPSIZE
-1] = 0;
853 pv
->thread_count
= hb_get_cpu_count();
854 pv
->frame
= calloc(pv
->thread_count
+ pv
->max_frames
, sizeof(Frame
));
855 for (int ii
= 0; ii
< pv
->thread_count
+ pv
->max_frames
; ii
++)
857 for (int c
= 0; c
< 3; c
++)
859 pv
->frame
[ii
].plane
[c
].mutex
= hb_lock_init();
863 pv
->thread_data
= malloc(pv
->thread_count
* sizeof(nlmeans_thread_arg_t
*));
864 if (taskset_init(&pv
->taskset
, pv
->thread_count
,
865 sizeof(nlmeans_thread_arg_t
)) == 0)
867 hb_error("NLMeans could not initialize taskset");
871 for (int ii
= 0; ii
< pv
->thread_count
; ii
++)
873 pv
->thread_data
[ii
] = taskset_thread_args(&pv
->taskset
, ii
);
874 if (pv
->thread_data
[ii
] == NULL
)
876 hb_error("NLMeans could not create thread args");
879 pv
->thread_data
[ii
]->pv
= pv
;
880 pv
->thread_data
[ii
]->segment
= ii
;
881 if (taskset_thread_spawn(&pv
->taskset
, ii
, "nlmeans_filter",
882 nlmeans_filter_thread
, HB_NORMAL_PRIORITY
) == 0)
884 hb_error("NLMeans could not spawn thread");
892 taskset_fini(&pv
->taskset
);
893 free(pv
->thread_data
);
898 static void nlmeans_close(hb_filter_object_t
*filter
)
900 hb_filter_private_t
*pv
= filter
->private_data
;
907 taskset_fini(&pv
->taskset
);
908 for (int c
= 0; c
< 3; c
++)
910 for (int f
= 0; f
< pv
->nframes
[c
]; f
++)
912 if (pv
->frame
[f
].plane
[c
].mem_pre
!= NULL
&&
913 pv
->frame
[f
].plane
[c
].mem_pre
!= pv
->frame
[f
].plane
[c
].mem
)
915 free(pv
->frame
[f
].plane
[c
].mem_pre
);
916 pv
->frame
[f
].plane
[c
].mem_pre
= NULL
;
918 if (pv
->frame
[f
].plane
[c
].mem
!= NULL
)
920 free(pv
->frame
[f
].plane
[c
].mem
);
921 pv
->frame
[f
].plane
[c
].mem
= NULL
;
926 for (int ii
= 0; ii
< pv
->thread_count
+ pv
->max_frames
; ii
++)
928 for (int c
= 0; c
< 3; c
++)
930 hb_lock_close(&pv
->frame
[ii
].plane
[c
].mutex
);
935 free(pv
->thread_data
);
937 filter
->private_data
= NULL
;
940 static void nlmeans_filter_thread(void *thread_args_v
)
942 nlmeans_thread_arg_t
*thread_data
= thread_args_v
;
943 hb_filter_private_t
*pv
= thread_data
->pv
;
944 int segment
= thread_data
->segment
;
946 hb_log("NLMeans thread started for segment %d", segment
);
950 // Wait until there is work to do.
951 taskset_thread_wait4start(&pv
->taskset
, segment
);
953 if (taskset_thread_stop(&pv
->taskset
, segment
))
958 Frame
*frame
= &pv
->frame
[segment
];
960 buf
= hb_frame_buffer_init(frame
->fmt
, frame
->width
, frame
->height
);
962 NLMeansFunctions
*functions
= &pv
->functions
;
964 for (int c
= 0; c
< 3; c
++)
966 if (pv
->strength
[c
] == 0)
968 nlmeans_deborder(&frame
->plane
[c
], buf
->plane
[c
].data
,
969 buf
->plane
[c
].width
, buf
->plane
[c
].stride
,
970 buf
->plane
[c
].height
);
973 if (pv
->prefilter
[c
] & NLMEANS_PREFILTER_MODE_PASSTHRU
)
975 nlmeans_prefilter(&pv
->frame
->plane
[c
], pv
->prefilter
[c
]);
976 nlmeans_deborder(&frame
->plane
[c
], buf
->plane
[c
].data
,
977 buf
->plane
[c
].width
, buf
->plane
[c
].stride
,
978 buf
->plane
[c
].height
);
982 // Process current plane
983 nlmeans_plane(functions
,
990 buf
->plane
[c
].stride
,
991 buf
->plane
[c
].height
,
997 pv
->weight_fact_table
[c
],
1000 buf
->s
= pv
->frame
[segment
].s
;
1001 thread_data
->out
= buf
;
1003 // Finished this segment, notify.
1004 taskset_thread_complete(&pv
->taskset
, segment
);
1006 taskset_thread_complete(&pv
->taskset
, segment
);
1009 static void nlmeans_add_frame(hb_filter_private_t
*pv
, hb_buffer_t
*buf
)
1011 for (int c
= 0; c
< 3; c
++)
1013 // Extend copy of plane with extra border and place in buffer
1014 const int border
= ((pv
->range
[c
] + 2) / 2 + 15) / 16 * 16;
1015 nlmeans_alloc(buf
->plane
[c
].data
,
1016 buf
->plane
[c
].width
,
1017 buf
->plane
[c
].stride
,
1018 buf
->plane
[c
].height
,
1019 &pv
->frame
[pv
->next_frame
].plane
[c
],
1021 pv
->frame
[pv
->next_frame
].s
= buf
->s
;
1022 pv
->frame
[pv
->next_frame
].width
= buf
->f
.width
;
1023 pv
->frame
[pv
->next_frame
].height
= buf
->f
.height
;
1024 pv
->frame
[pv
->next_frame
].fmt
= buf
->f
.fmt
;
1029 static hb_buffer_t
* nlmeans_filter(hb_filter_private_t
*pv
)
1031 if (pv
->next_frame
< pv
->max_frames
+ pv
->thread_count
)
1036 taskset_cycle(&pv
->taskset
);
1038 // Free buffers that are not needed for next taskset cycle
1039 for (int c
= 0; c
< 3; c
++)
1041 for (int t
= 0; t
< pv
->thread_count
; t
++)
1043 // Release last frame in buffer
1044 if (pv
->frame
[t
].plane
[c
].mem_pre
!= NULL
&&
1045 pv
->frame
[t
].plane
[c
].mem_pre
!= pv
->frame
[t
].plane
[c
].mem
)
1047 free(pv
->frame
[t
].plane
[c
].mem_pre
);
1048 pv
->frame
[t
].plane
[c
].mem_pre
= NULL
;
1050 if (pv
->frame
[t
].plane
[c
].mem
!= NULL
)
1052 free(pv
->frame
[t
].plane
[c
].mem
);
1053 pv
->frame
[t
].plane
[c
].mem
= NULL
;
1057 // Shift frames in buffer down
1058 for (int f
= 0; f
< pv
->max_frames
; f
++)
1060 // Don't move the mutex!
1061 Frame frame
= pv
->frame
[f
];
1062 pv
->frame
[f
] = pv
->frame
[f
+pv
->thread_count
];
1063 for (int c
= 0; c
< 3; c
++)
1065 pv
->frame
[f
].plane
[c
].mutex
= frame
.plane
[c
].mutex
;
1066 pv
->frame
[f
+pv
->thread_count
].plane
[c
].mem_pre
= NULL
;
1067 pv
->frame
[f
+pv
->thread_count
].plane
[c
].mem
= NULL
;
1070 pv
->next_frame
-= pv
->thread_count
;
1072 // Collect results from taskset
1073 hb_buffer_t
*last
= NULL
, *out
= NULL
;
1074 for (int t
= 0; t
< pv
->thread_count
; t
++)
1078 out
= last
= pv
->thread_data
[t
]->out
;
1082 last
->next
= pv
->thread_data
[t
]->out
;
1083 last
= pv
->thread_data
[t
]->out
;
1089 static hb_buffer_t
* nlmeans_filter_flush(hb_filter_private_t
*pv
)
1091 hb_buffer_t
*out
= NULL
, *last
= NULL
;
1093 for (int f
= 0; f
< pv
->next_frame
; f
++)
1095 Frame
*frame
= &pv
->frame
[f
];
1097 buf
= hb_frame_buffer_init(frame
->fmt
, frame
->width
, frame
->height
);
1099 NLMeansFunctions
*functions
= &pv
->functions
;
1101 for (int c
= 0; c
< 3; c
++)
1103 if (pv
->strength
[c
] == 0)
1105 nlmeans_deborder(&frame
->plane
[c
], buf
->plane
[c
].data
,
1106 buf
->plane
[c
].width
, buf
->plane
[c
].stride
,
1107 buf
->plane
[c
].height
);
1110 if (pv
->prefilter
[c
] & NLMEANS_PREFILTER_MODE_PASSTHRU
)
1112 nlmeans_prefilter(&pv
->frame
[f
].plane
[c
], pv
->prefilter
[c
]);
1113 nlmeans_deborder(&frame
->plane
[c
], buf
->plane
[c
].data
,
1114 buf
->plane
[c
].width
, buf
->plane
[c
].stride
,
1115 buf
->plane
[c
].height
);
1119 int nframes
= pv
->next_frame
- f
;
1120 if (pv
->nframes
[c
] < nframes
)
1122 nframes
= pv
->nframes
[c
];
1124 // Process current plane
1125 nlmeans_plane(functions
,
1131 buf
->plane
[c
].width
,
1132 buf
->plane
[c
].stride
,
1133 buf
->plane
[c
].height
,
1139 pv
->weight_fact_table
[c
],
1156 static int nlmeans_work(hb_filter_object_t
*filter
,
1157 hb_buffer_t
**buf_in
,
1158 hb_buffer_t
**buf_out
)
1160 hb_filter_private_t
*pv
= filter
->private_data
;
1161 hb_buffer_t
*in
= *buf_in
;
1163 if (in
->s
.flags
& HB_BUF_FLAG_EOF
)
1166 // Flush buffered frames
1167 last
= *buf_out
= nlmeans_filter_flush(pv
);
1169 // And terminate the buffer list with a null buffer
1172 while (last
->next
!= NULL
)
1181 return HB_FILTER_DONE
;
1184 nlmeans_add_frame(pv
, in
);
1185 *buf_out
= nlmeans_filter(pv
);
1187 return HB_FILTER_OK
;