1 /*****************************************************************************
2 * algo_phosphor.c : Phosphor algorithm for the VLC deinterlacer
3 *****************************************************************************
4 * Copyright (C) 2011 VLC authors and VideoLAN
7 * Author: Juha Jeronen <juha.jeronen@jyu.fi>
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms of the GNU Lesser General Public License as published by
11 * the Free Software Foundation; either version 2.1 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public License
20 * along with this program; if not, write to the Free Software Foundation,
21 * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
22 *****************************************************************************/
28 #ifdef CAN_COMPILE_MMXEXT
30 # include <stdalign.h>
36 #include <vlc_common.h>
38 #include <vlc_picture.h>
39 #include <vlc_filter.h>
41 #include "deinterlace.h" /* filter_sys_t */
42 #include "helpers.h" /* ComposeFrame() */
44 #include "algo_phosphor.h"
46 /*****************************************************************************
48 *****************************************************************************/
51 * Internal helper function: dims (darkens) the given field
52 * of the given picture.
54 * This is used for simulating CRT light output decay in RenderPhosphor().
56 * The strength "1" is recommended. It's a matter of taste,
57 * so it's parametrized.
59 * Note on chroma formats:
60 * - If input is 4:2:2, all planes are processed.
61 * - If input is 4:2:0, only the luma plane is processed, because both fields
62 * have the same chroma. This will distort colours, especially for high
63 * filter strengths, especially for pixels whose U and/or V values are
64 * far away from the origin (which is at 128 in uint8 format).
66 * @param p_dst Input/output picture. Will be modified in-place.
67 * @param i_field Darken which field? 0 = top, 1 = bottom.
68 * @param i_strength Strength of effect: 1, 2 or 3 (division by 2, 4 or 8).
69 * @see RenderPhosphor()
72 static void DarkenField( picture_t
*p_dst
,
73 const int i_field
, const int i_strength
,
76 assert( p_dst
!= NULL
);
77 assert( i_field
== 0 || i_field
== 1 );
78 assert( i_strength
>= 1 && i_strength
<= 3 );
80 /* Bitwise ANDing with this clears the i_strength highest bits
82 const uint8_t remove_high_u8
= 0xFF >> i_strength
;
83 const uint64_t remove_high_u64
= remove_high_u8
*
84 INT64_C(0x0101010101010101);
88 For luma, the operation is just a shift + bitwise AND, so we vectorize
89 even in the C version.
91 There is an MMX version too, because it performs about twice faster.
93 int i_plane
= Y_PLANE
;
94 uint8_t *p_out
, *p_out_end
;
95 int w
= p_dst
->p
[i_plane
].i_visible_pitch
;
96 p_out
= p_dst
->p
[i_plane
].p_pixels
;
97 p_out_end
= p_out
+ p_dst
->p
[i_plane
].i_pitch
98 * p_dst
->p
[i_plane
].i_visible_lines
;
100 /* skip first line for bottom field */
102 p_out
+= p_dst
->p
[i_plane
].i_pitch
;
104 int wm8
= w
% 8; /* remainder */
105 int w8
= w
- wm8
; /* part of width that is divisible by 8 */
106 for( ; p_out
< p_out_end
; p_out
+= 2*p_dst
->p
[i_plane
].i_pitch
)
108 uint64_t *po
= (uint64_t *)p_out
;
111 for( ; x
< w8
; x
+= 8, ++po
)
112 (*po
) = ( ((*po
) >> i_strength
) & remove_high_u64
);
114 /* handle the width remainder */
115 uint8_t *po_temp
= (uint8_t *)po
;
116 for( ; x
< w
; ++x
, ++po_temp
)
117 (*po_temp
) = ( ((*po_temp
) >> i_strength
) & remove_high_u8
);
120 /* Process chroma if the field chromas are independent.
122 The origin (black) is at YUV = (0, 128, 128) in the uint8 format.
123 The chroma processing is a bit more complicated than luma,
124 and needs MMX for vectorization.
128 for( i_plane
++ /* luma already handled*/;
129 i_plane
< p_dst
->i_planes
;
132 int w
= p_dst
->p
[i_plane
].i_visible_pitch
;
133 p_out
= p_dst
->p
[i_plane
].p_pixels
;
134 p_out_end
= p_out
+ p_dst
->p
[i_plane
].i_pitch
135 * p_dst
->p
[i_plane
].i_visible_lines
;
137 /* skip first line for bottom field */
139 p_out
+= p_dst
->p
[i_plane
].i_pitch
;
141 for( ; p_out
< p_out_end
; p_out
+= 2*p_dst
->p
[i_plane
].i_pitch
)
143 /* Handle the width remainder */
145 for( int x
= 0; x
< w
; ++x
, ++po
)
146 (*po
) = 128 + ( ((*po
) - 128) / (1 << i_strength
) );
148 } /* for i_plane... */
149 } /* if process_chroma */
152 #ifdef CAN_COMPILE_MMXEXT
154 static void DarkenFieldMMX( picture_t
*p_dst
,
155 const int i_field
, const int i_strength
,
156 bool process_chroma
)
158 assert( p_dst
!= NULL
);
159 assert( i_field
== 0 || i_field
== 1 );
160 assert( i_strength
>= 1 && i_strength
<= 3 );
162 uint64_t i_strength_u64
= i_strength
; /* needs to know number of bits */
163 const uint8_t remove_high_u8
= 0xFF >> i_strength
;
164 const uint64_t remove_high_u64
= remove_high_u8
*
165 INT64_C(0x0101010101010101);
167 int i_plane
= Y_PLANE
;
168 uint8_t *p_out
, *p_out_end
;
169 int w
= p_dst
->p
[i_plane
].i_visible_pitch
;
170 p_out
= p_dst
->p
[i_plane
].p_pixels
;
171 p_out_end
= p_out
+ p_dst
->p
[i_plane
].i_pitch
172 * p_dst
->p
[i_plane
].i_visible_lines
;
174 /* skip first line for bottom field */
176 p_out
+= p_dst
->p
[i_plane
].i_pitch
;
178 int wm8
= w
% 8; /* remainder */
179 int w8
= w
- wm8
; /* part of width that is divisible by 8 */
180 for( ; p_out
< p_out_end
; p_out
+= 2*p_dst
->p
[i_plane
].i_pitch
)
182 uint64_t *po
= (uint64_t *)p_out
;
185 movq_m2r( i_strength_u64
, mm1
);
186 movq_m2r( remove_high_u64
, mm2
);
187 for( ; x
< w8
; x
+= 8 )
189 movq_m2r( (*po
), mm0
);
191 psrlq_r2r( mm1
, mm0
);
192 pand_r2r( mm2
, mm0
);
194 movq_r2m( mm0
, (*po
++) );
197 /* handle the width remainder */
198 uint8_t *po_temp
= (uint8_t *)po
;
199 for( ; x
< w
; ++x
, ++po_temp
)
200 (*po_temp
) = ( ((*po_temp
) >> i_strength
) & remove_high_u8
);
203 /* Process chroma if the field chromas are independent.
205 The origin (black) is at YUV = (0, 128, 128) in the uint8 format.
206 The chroma processing is a bit more complicated than luma,
207 and needs MMX for vectorization.
211 for( i_plane
++ /* luma already handled */;
212 i_plane
< p_dst
->i_planes
;
215 int w
= p_dst
->p
[i_plane
].i_visible_pitch
;
216 int wm8
= w
% 8; /* remainder */
217 int w8
= w
- wm8
; /* part of width that is divisible by 8 */
219 p_out
= p_dst
->p
[i_plane
].p_pixels
;
220 p_out_end
= p_out
+ p_dst
->p
[i_plane
].i_pitch
221 * p_dst
->p
[i_plane
].i_visible_lines
;
223 /* skip first line for bottom field */
225 p_out
+= p_dst
->p
[i_plane
].i_pitch
;
227 for( ; p_out
< p_out_end
; p_out
+= 2*p_dst
->p
[i_plane
].i_pitch
)
231 /* See also easy-to-read C version below. */
232 static alignas (8) const mmx_t b128
= {
233 .uq
= 0x8080808080808080ULL
236 movq_m2r( b128
, mm5
);
237 movq_m2r( i_strength_u64
, mm6
);
238 movq_m2r( remove_high_u64
, mm7
);
240 uint64_t *po8
= (uint64_t *)p_out
;
241 for( ; x
< w8
; x
+= 8 )
243 movq_m2r( (*po8
), mm0
);
245 movq_r2r( mm5
, mm2
); /* 128 */
246 movq_r2r( mm0
, mm1
); /* copy of data */
247 psubusb_r2r( mm2
, mm1
); /* mm1 = max(data - 128, 0) */
248 psubusb_r2r( mm0
, mm2
); /* mm2 = max(128 - data, 0) */
251 psrlq_r2r( mm6
, mm1
);
252 psrlq_r2r( mm6
, mm2
);
253 pand_r2r( mm7
, mm1
);
254 pand_r2r( mm7
, mm2
);
256 /* collect results from pos./neg. parts */
257 psubb_r2r( mm2
, mm1
);
258 paddb_r2r( mm5
, mm1
);
260 movq_r2m( mm1
, (*po8
++) );
263 /* C version - handle the width remainder */
265 for( ; x
< w
; ++x
, ++po
)
266 (*po
) = 128 + ( ((*po
) - 128) / (1 << i_strength
) );
268 } /* for i_plane... */
269 } /* if process_chroma */
275 /*****************************************************************************
277 *****************************************************************************/
279 /* See header for function doc. */
280 int RenderPhosphor( filter_t
*p_filter
,
281 picture_t
*p_dst
, picture_t
*p_pic
,
282 int i_order
, int i_field
)
285 assert( p_filter
!= NULL
);
286 assert( p_dst
!= NULL
);
287 assert( i_order
>= 0 && i_order
<= 2 ); /* 2 = soft field repeat */
288 assert( i_field
== 0 || i_field
== 1 );
290 filter_sys_t
*p_sys
= p_filter
->p_sys
;
292 /* Last two input frames */
293 picture_t
*p_in
= p_sys
->context
.pp_history
[HISTORY_SIZE
-1];
294 picture_t
*p_old
= p_sys
->context
.pp_history
[HISTORY_SIZE
-2];
296 /* Use the same input picture as "old" at the first frame after startup */
300 /* If the history mechanism has failed, we can't do anything. */
304 assert( p_old
!= NULL
);
305 assert( p_in
!= NULL
);
307 /* Decide sources for top & bottom fields of output. */
308 picture_t
*p_in_top
= p_in
;
309 picture_t
*p_in_bottom
= p_in
;
310 /* For the first output field this frame,
311 grab "old" field from previous frame. */
314 if( i_field
== 0 ) /* rendering top field */
316 else /* i_field == 1, rendering bottom field */
320 compose_chroma_t cc
= CC_ALTLINE
;
321 if( 2 * p_sys
->chroma
->p
[1].h
.num
== p_sys
->chroma
->p
[1].h
.den
&&
322 2 * p_sys
->chroma
->p
[2].h
.num
== p_sys
->chroma
->p
[2].h
.den
)
324 /* Only 420 like chroma */
325 switch( p_sys
->phosphor
.i_chroma_for_420
)
333 else /* i_field == 1 */
334 cc
= CC_SOURCE_BOTTOM
;
343 /* The above are the only possibilities, if there are no bugs. */
344 vlc_assert_unreachable();
348 ComposeFrame( p_filter
, p_dst
, p_in_top
, p_in_bottom
, cc
, p_filter
->fmt_in
.video
.i_chroma
== VLC_CODEC_YV12
);
350 /* Simulate phosphor light output decay for the old field.
352 The dimmer can also be switched off in the configuration, but that is
353 more of a technical curiosity or an educational toy for advanced users
354 than a useful deinterlacer mode (although it does make telecined
355 material look slightly better than without any filtering).
357 In most use cases the dimmer is used.
359 if( p_sys
->phosphor
.i_dimmer_strength
> 0 )
361 #ifdef CAN_COMPILE_MMXEXT
362 if( vlc_CPU_MMXEXT() )
363 DarkenFieldMMX( p_dst
, !i_field
, p_sys
->phosphor
.i_dimmer_strength
,
364 p_sys
->chroma
->p
[1].h
.num
== p_sys
->chroma
->p
[1].h
.den
&&
365 p_sys
->chroma
->p
[2].h
.num
== p_sys
->chroma
->p
[2].h
.den
);
368 DarkenField( p_dst
, !i_field
, p_sys
->phosphor
.i_dimmer_strength
,
369 p_sys
->chroma
->p
[1].h
.num
== p_sys
->chroma
->p
[1].h
.den
&&
370 p_sys
->chroma
->p
[2].h
.num
== p_sys
->chroma
->p
[2].h
.den
);