DVDnav: fix .ifo files opening
[vlc/vlc-skelet.git] / modules / video_filter / sepia.c
blob249d10feb9b8f5d2135898de8882b37fbdbd580d
1 /*****************************************************************************
2 * sepia.c : Sepia video plugin for vlc
3 *****************************************************************************
4 * Copyright (C) 2010 the VideoLAN team
5 * $Id$
7 * Authors: Branko Kokanovic <branko.kokanovic@gmail.com>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
22 *****************************************************************************/
24 /*****************************************************************************
25 * Preamble
26 *****************************************************************************/
28 #ifdef HAVE_CONFIG_H
29 # include "config.h"
30 #endif
32 #include <vlc_common.h>
33 #include <vlc_plugin.h>
34 #include <vlc_filter.h>
35 #include <vlc_cpu.h>
37 #include <assert.h>
38 #include "filter_picture.h"
40 /*****************************************************************************
41 * Local prototypes
42 *****************************************************************************/
43 static int Create ( vlc_object_t * );
44 static void Destroy ( vlc_object_t * );
46 static void RVSepia( picture_t *, picture_t *, int );
47 static void PlanarI420Sepia( picture_t *, picture_t *, int);
48 static void PackedYUVSepia( picture_t *, picture_t *, int);
49 static picture_t *Filter( filter_t *, picture_t * );
50 inline void Sepia8ySSE41( uint8_t *, const uint8_t *, volatile uint8_t * );
51 inline void Memcpy8BMMX( uint8_t *, const uint8_t * );
52 static const char *const ppsz_filter_options[] = {
53 "intensity", NULL
56 /*****************************************************************************
57 * Module descriptor
58 *****************************************************************************/
59 #define SEPIA_INTENSITY_TEXT N_("Sepia intensity")
60 #define SEPIA_INTENSITY_LONGTEXT N_("Intensity of sepia effect" )
62 #define CFG_PREFIX "sepia-"
64 vlc_module_begin ()
65 set_description( N_("Sepia video filter") )
66 set_shortname( N_("Sepia" ) )
67 set_help( N_("Gives video a warmer tone by applying sepia effect") )
68 set_category( CAT_VIDEO )
69 set_subcategory( SUBCAT_VIDEO_VFILTER )
70 set_capability( "video filter2", 0 )
71 add_integer_with_range( CFG_PREFIX "intensity", 100, 0, 255, NULL,
72 SEPIA_INTENSITY_TEXT, SEPIA_INTENSITY_LONGTEXT,
73 false )
74 set_callbacks( Create, Destroy )
75 vlc_module_end ()
77 /*****************************************************************************
78 * callback prototypes
79 *****************************************************************************/
80 static int FilterCallback( vlc_object_t *, char const *,
81 vlc_value_t, vlc_value_t, void * );
83 typedef void (*SepiaFunction)( picture_t *, picture_t *, int );
85 static const struct
87 vlc_fourcc_t i_chroma;
88 SepiaFunction pf_sepia;
89 } p_sepia_cfg[] = {
90 { VLC_CODEC_I420, PlanarI420Sepia },
91 { VLC_CODEC_RGB24, RVSepia },
92 { VLC_CODEC_RGB32, RVSepia },
93 { VLC_CODEC_UYVY, PackedYUVSepia },
94 { VLC_CODEC_VYUY, PackedYUVSepia },
95 { VLC_CODEC_YUYV, PackedYUVSepia },
96 { VLC_CODEC_YVYU, PackedYUVSepia },
97 { 0, NULL }
100 /*****************************************************************************
101 * filter_sys_t: adjust filter method descriptor
102 *****************************************************************************/
103 struct filter_sys_t
105 SepiaFunction pf_sepia;
106 int i_intensity;
107 vlc_spinlock_t lock;
110 /*****************************************************************************
111 * Create: allocates Sepia video thread output method
112 *****************************************************************************
113 * This function allocates and initializes a Sepia vout method.
114 *****************************************************************************/
115 static int Create( vlc_object_t *p_this )
117 filter_t *p_filter = (filter_t *)p_this;
118 filter_sys_t *p_sys;
120 /* Allocate structure */
121 p_sys = p_filter->p_sys = malloc( sizeof( filter_sys_t ) );
122 if( p_filter->p_sys == NULL )
123 return VLC_ENOMEM;
125 p_sys->pf_sepia = NULL;
127 for( int i = 0; p_sepia_cfg[i].i_chroma != 0; i++ )
129 if( p_sepia_cfg[i].i_chroma != p_filter->fmt_in.video.i_chroma )
130 continue;
131 p_sys->pf_sepia = p_sepia_cfg[i].pf_sepia;
134 if( p_sys->pf_sepia == NULL )
136 msg_Err( p_filter, "Unsupported input chroma (%4.4s)",
137 (char*)&(p_filter->fmt_in.video.i_chroma) );
138 free( p_sys );
139 return VLC_EGENERIC;
142 config_ChainParse( p_filter, CFG_PREFIX, ppsz_filter_options,
143 p_filter->p_cfg );
144 p_sys->i_intensity= var_CreateGetIntegerCommand( p_filter,
145 CFG_PREFIX "intensity" );
147 vlc_spin_init( &p_sys->lock );
149 var_AddCallback( p_filter, CFG_PREFIX "intensity", FilterCallback, NULL );
151 p_filter->pf_video_filter = Filter;
153 return VLC_SUCCESS;
156 /*****************************************************************************
157 * Destroy: destroy sepia video thread output method
158 *****************************************************************************
159 * Terminate an output method
160 *****************************************************************************/
161 static void Destroy( vlc_object_t *p_this )
163 filter_t *p_filter = (filter_t *)p_this;
165 var_DelCallback( p_filter, CFG_PREFIX "intensity", FilterCallback, NULL );
167 vlc_spin_destroy( &p_filter->p_sys->lock );
168 free( p_filter->p_sys );
171 /*****************************************************************************
172 * Render: displays previously rendered output
173 *****************************************************************************
174 * This function send the currently rendered image to sepia image, waits
175 * until it is displayed and switch the two rendering buffers, preparing next
176 * frame.
177 *****************************************************************************/
178 static picture_t *Filter( filter_t *p_filter, picture_t *p_pic )
180 picture_t *p_outpic;
181 int intensity;
183 if( !p_pic ) return NULL;
185 filter_sys_t *p_sys = p_filter->p_sys;
186 vlc_spin_lock( &p_sys->lock );
187 intensity = p_sys->i_intensity;
188 vlc_spin_unlock( &p_sys->lock );
190 p_outpic = filter_NewPicture( p_filter );
191 if( !p_outpic )
193 msg_Warn( p_filter, "can't get output picture" );
194 picture_Release( p_pic );
195 return NULL;
198 p_sys->pf_sepia( p_pic, p_outpic, intensity );
200 return CopyInfoAndRelease( p_outpic, p_pic );
203 /*****************************************************************************
204 * PlanarI420Sepia: Applies sepia to one frame of the planar I420 video
205 *****************************************************************************
206 * This function applies sepia effect to one frame of the video by iterating
207 * through video lines. We iterate for every two lines and for every two pixels
208 * in line to calculate new sepia values for four y components as well for u
209 * and v components.
210 *****************************************************************************/
211 static void PlanarI420Sepia( picture_t *p_pic, picture_t *p_outpic,
212 int i_intensity )
214 // prepared values to copy for U and V channels
215 const uint8_t filling_const_8u = 128 - i_intensity / 6;
216 const uint8_t filling_const_8v = 128 + i_intensity / 14;
218 #if defined(CAN_COMPILE_SSE4_1) && 1
219 if (vlc_CPU() & CPU_CAPABILITY_SSE4_1)
221 /*prepare array of values to copy with mmx, compute only once
222 to improve speed */
223 volatile uint8_t intensity_array[8] = { i_intensity, i_intensity,
224 i_intensity, i_intensity, i_intensity, i_intensity,
225 i_intensity, i_intensity };
226 const uint8_t filling_array_8u[8] =
227 { filling_const_8u, filling_const_8u, filling_const_8u,
228 filling_const_8u, filling_const_8u, filling_const_8u,
229 filling_const_8u, filling_const_8u };
230 const uint8_t filling_array_8v[8] =
231 { filling_const_8v, filling_const_8v, filling_const_8v,
232 filling_const_8v, filling_const_8v, filling_const_8v,
233 filling_const_8v, filling_const_8v };
235 /* iterate for every two visible line in the frame */
236 for (int y = 0; y < p_pic->p[Y_PLANE].i_visible_lines - 1; y += 2)
238 const int i_dy_line1_start = y * p_outpic->p[Y_PLANE].i_pitch;
239 const int i_dy_line2_start =
240 (y + 1) * p_outpic->p[Y_PLANE].i_pitch;
241 const int i_du_line_start =
242 (y / 2) * p_outpic->p[U_PLANE].i_pitch;
243 const int i_dv_line_start =
244 (y / 2) * p_outpic->p[V_PLANE].i_pitch;
245 int x = 0;
246 /* iterate for every visible line in the frame (eight values at once) */
247 for (; x < p_pic->p[Y_PLANE].i_visible_pitch - 15; x += 16)
249 /* Compute yellow channel values with asm function */
250 Sepia8ySSE41(
251 &p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x],
252 &p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x],
253 intensity_array );
254 Sepia8ySSE41(
255 &p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x],
256 &p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x],
257 intensity_array );
258 Sepia8ySSE41(
259 &p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 8],
260 &p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 8],
261 intensity_array );
262 Sepia8ySSE41(
263 &p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 8],
264 &p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 8],
265 intensity_array );
266 /* Copy precomputed values to destination image memory location */
267 Memcpy8BMMX(
268 &p_outpic->p[U_PLANE].p_pixels[i_du_line_start + (x / 2)],
269 filling_array_8u );
270 Memcpy8BMMX(&p_outpic->p[V_PLANE].p_pixels[i_dv_line_start + (x / 2)],
271 filling_array_8v );
273 /* Completing the job, the cycle above takes really big chunks, so
274 this makes sure the job will be done completely */
275 for (; x < p_pic->p[Y_PLANE].i_visible_pitch - 1; x += 2)
277 // y = y - y/4 {to prevent overflow} + intensity / 4
278 p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] =
279 p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] -
280 (p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] >> 2) +
281 (i_intensity >> 2);
282 p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] =
283 p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] -
284 (p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] >> 2) +
285 (i_intensity >> 2);
286 p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] =
287 p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] -
288 (p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] >> 2) +
289 (i_intensity >> 2);
290 p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] =
291 p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] -
292 (p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] >> 2) +
293 (i_intensity >> 2);
294 // u = 128 {half => B&W} - intensity / 6
295 p_outpic->p[U_PLANE].p_pixels[i_du_line_start + (x / 2)] =
296 filling_const_8u;
297 // v = 128 {half => B&W} + intensity / 14
298 p_outpic->p[V_PLANE].p_pixels[i_dv_line_start + (x / 2)] =
299 filling_const_8v;
302 } else
303 #endif
305 /* iterate for every two visible line in the frame */
306 for( int y = 0; y < p_pic->p[Y_PLANE].i_visible_lines - 1; y += 2)
308 const int i_dy_line1_start = y * p_outpic->p[Y_PLANE].i_pitch;
309 const int i_dy_line2_start = ( y + 1 ) * p_outpic->p[Y_PLANE].i_pitch;
310 const int i_du_line_start = (y/2) * p_outpic->p[U_PLANE].i_pitch;
311 const int i_dv_line_start = (y/2) * p_outpic->p[V_PLANE].i_pitch;
312 // to prevent sigsegv if one pic is smaller (theoretically)
313 int i_picture_size_limit = p_pic->p[Y_PLANE].i_visible_pitch
314 < p_outpic->p[Y_PLANE].i_visible_pitch
315 ? (p_pic->p[Y_PLANE].i_visible_pitch - 1) :
316 (p_outpic->p[Y_PLANE].i_visible_pitch - 1);
317 /* iterate for every two visible line in the frame */
318 for( int x = 0; x < i_picture_size_limit; x += 2)
320 // y = y - y/4 {to prevent overflow} + intensity / 4
321 p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] =
322 p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] -
323 (p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x] >> 2) +
324 (i_intensity >> 2);
325 p_outpic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] =
326 p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] -
327 (p_pic->p[Y_PLANE].p_pixels[i_dy_line1_start + x + 1] >> 2) +
328 (i_intensity >> 2);
329 p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] =
330 p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] -
331 (p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x] >> 2) +
332 (i_intensity >> 2);
333 p_outpic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] =
334 p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] -
335 (p_pic->p[Y_PLANE].p_pixels[i_dy_line2_start + x + 1] >> 2) +
336 (i_intensity >> 2);
337 // u = 128 {half => B&W} - intensity / 6
338 p_outpic->p[U_PLANE].p_pixels[i_du_line_start + (x / 2)] =
339 filling_const_8u;
340 // v = 128 {half => B&W} + intensity / 14
341 p_outpic->p[V_PLANE].p_pixels[i_dv_line_start + (x / 2)] =
342 filling_const_8v;
348 /*****************************************************************************
349 * PackedYUVSepia: Applies sepia to one frame of the packed YUV video
350 *****************************************************************************
351 * This function applies sepia effext to one frame of the video by iterating
352 * through video lines. In every pass, we calculate new values for pixels
353 * (UYVY, VYUY, YUYV and YVYU formats are supported)
354 *****************************************************************************/
355 static void PackedYUVSepia( picture_t *p_pic, picture_t *p_outpic,
356 int i_intensity )
358 uint8_t *p_in, *p_in_end, *p_line_end, *p_out;
359 int i_yindex = 1, i_uindex = 2, i_vindex = 0;
361 GetPackedYuvOffsets( p_outpic->format.i_chroma,
362 &i_yindex, &i_uindex, &i_vindex );
364 // prepared values to copy for U and V channels
365 const uint8_t filling_const_8u = 128 - i_intensity / 6;
366 const uint8_t filling_const_8v = 128 + i_intensity / 14;
368 p_in = p_pic->p[0].p_pixels;
369 p_in_end = p_in + p_pic->p[0].i_visible_lines
370 * p_pic->p[0].i_pitch;
371 p_out = p_outpic->p[0].p_pixels;
372 #if defined(CAN_COMPILE_SSE4_1)
373 if (vlc_CPU() & CPU_CAPABILITY_SSE4_1)
375 /*prepare array of values to copy with mmx, compute only once
376 to improve speed */
377 volatile uint8_t intensity_array[8] = { i_intensity, i_intensity,
378 i_intensity, i_intensity, i_intensity, i_intensity,
379 i_intensity,
380 i_intensity
382 const uint8_t filling_array_8u[8] =
383 { filling_const_8u, filling_const_8u,
384 filling_const_8u, filling_const_8u, filling_const_8u,
385 filling_const_8u,
386 filling_const_8u, filling_const_8u
388 const uint8_t filling_array_8v[8] =
389 { filling_const_8v, filling_const_8v,
390 filling_const_8v, filling_const_8v, filling_const_8v,
391 filling_const_8v,
392 filling_const_8v, filling_const_8v
395 /* iterate for every two visible line in the frame */
396 while (p_in < p_in_end)
398 p_line_end = p_in + p_pic->p[0].i_visible_pitch;
399 while (p_in < p_line_end)
401 Sepia8ySSE41(&p_out[i_yindex], &p_in[i_yindex],
402 intensity_array);
403 Sepia8ySSE41(&p_out[i_yindex + 8], &p_in[i_yindex + 8],
404 intensity_array);
405 Sepia8ySSE41(&p_out[i_yindex + 16], &p_in[i_yindex + 16],
406 intensity_array);
407 Sepia8ySSE41(&p_out[i_yindex + 24], &p_in[i_yindex + 24],
408 intensity_array);
409 Memcpy8BMMX(&p_out[i_uindex], filling_array_8u);
410 Memcpy8BMMX(&p_out[i_vindex], filling_array_8v);
412 p_in += 32;
413 p_out += 32;
415 while (p_in < p_line_end)
417 p_out[i_yindex] =
418 p_in[i_yindex] - (p_in[i_yindex] >> 2) +
419 (i_intensity >> 2);
420 p_out[i_yindex + 2] =
421 p_in[i_yindex + 2] - (p_in[i_yindex + 2] >> 2) +
422 (i_intensity >> 2);
423 p_out[i_uindex] = filling_const_8u;
424 p_out[i_vindex] = filling_const_8v;
425 p_in += 4;
426 p_out += 4;
428 p_in += p_pic->p[0].i_pitch - p_pic->p[0].i_visible_pitch;
429 p_out += p_outpic->p[0].i_pitch
430 - p_outpic->p[0].i_visible_pitch;
432 } else
433 #endif
435 while( p_in < p_in_end )
437 p_line_end = p_in + p_pic->p[0].i_visible_pitch;
438 while( p_in < p_line_end )
440 /* calculate new, sepia values */
441 p_out[i_yindex] =
442 p_in[i_yindex] - (p_in[i_yindex] >> 2) + (i_intensity >> 2);
443 p_out[i_yindex + 2] =
444 p_in[i_yindex + 2] - (p_in[i_yindex + 2] >> 2)
445 + (i_intensity >> 2);
446 p_out[i_uindex] = filling_const_8u;
447 p_out[i_vindex] = filling_const_8v;
448 p_in += 4;
449 p_out += 4;
451 p_in += p_pic->p[0].i_pitch - p_pic->p[0].i_visible_pitch;
452 p_out += p_outpic->p[0].i_pitch
453 - p_outpic->p[0].i_visible_pitch;
458 /*****************************************************************************
459 * RVSepia: Applies sepia to one frame of the RV24/RV32 video
460 *****************************************************************************
461 * This function applies sepia effect to one frame of the video by iterating
462 * through video lines and calculating new values for every byte in chunks of
463 * 3 (RV24) or 4 (RV32) bytes.
464 *****************************************************************************/
465 static void RVSepia( picture_t *p_pic, picture_t *p_outpic, int i_intensity )
467 #define SCALEBITS 10
468 #define ONE_HALF (1 << (SCALEBITS - 1))
469 #define FIX(x) ((int) ((x) * (1<<SCALEBITS) + 0.5))
470 uint8_t *p_in, *p_in_end, *p_line_end, *p_out;
471 bool b_isRV32 = p_pic->format.i_chroma == VLC_CODEC_RGB32;
472 int i_rindex = 0, i_gindex = 1, i_bindex = 2;
474 GetPackedRgbIndexes( &p_outpic->format, &i_rindex, &i_gindex, &i_bindex );
476 p_in = p_pic->p[0].p_pixels;
477 p_in_end = p_in + p_pic->p[0].i_visible_lines
478 * p_pic->p[0].i_pitch;
479 p_out = p_outpic->p[0].p_pixels;
481 /* Precompute values constant for this certain i_intensity, using the same
482 * formula as YUV functions above */
483 uint8_t r_intensity = (( FIX( 1.40200 * 255.0 / 224.0 ) * (i_intensity * 14)
484 + ONE_HALF )) >> SCALEBITS;
485 uint8_t g_intensity = (( - FIX(0.34414*255.0/224.0) * ( - i_intensity / 6 )
486 - FIX( 0.71414 * 255.0 / 224.0) * ( i_intensity * 14 )
487 + ONE_HALF )) >> SCALEBITS;
488 uint8_t b_intensity = (( FIX( 1.77200 * 255.0 / 224.0) * ( - i_intensity / 6 )
489 + ONE_HALF )) >> SCALEBITS;
491 while (p_in < p_in_end)
493 p_line_end = p_in + p_pic->p[0].i_visible_pitch;
494 while (p_in < p_line_end)
496 /* do sepia: this calculation is based on the formula to calculate
497 * YUV->RGB and RGB->YUV (in filter_picture.h) mode and that
498 * y = y - y/4 + intensity/4 . As Y is the only channel that changes
499 * through the whole image. After that, precomputed values are added
500 * for each RGB channel and saved in the output image.
501 * FIXME: needs cleanup */
502 uint8_t i_y = ((( 66 * p_in[i_rindex] + 129 * p_in[i_gindex] + 25
503 * p_in[i_bindex] + 128 ) >> 8 ) * FIX(255.0/219.0))
504 - (((( 66 * p_in[i_rindex] + 129 * p_in[i_gindex] + 25
505 * p_in[i_bindex] + 128 ) >> 8 )
506 * FIX( 255.0 / 219.0 )) >> 2 ) + ( i_intensity >> 2 );
507 p_out[i_rindex] = vlc_uint8(i_y + r_intensity);
508 p_out[i_gindex] = vlc_uint8(i_y + g_intensity);
509 p_out[i_bindex] = vlc_uint8(i_y + b_intensity);
510 p_in += 3;
511 p_out += 3;
512 /* for rv32 we take 4 chunks at the time */
513 if (b_isRV32) {
514 /* alpha channel stays the same */
515 *p_out++ = *p_in++;
519 p_in += p_pic->p[0].i_pitch - p_pic->p[0].i_visible_pitch;
520 p_out += p_outpic->p[0].i_pitch
521 - p_outpic->p[0].i_visible_pitch;
523 #undef SCALEBITS
524 #undef ONE_HALF
525 #undef FIX
528 /*****************************************************************************
529 * Sepia8ySSE41
530 *****************************************************************************
531 * This function applies sepia effect to eight bytes of yellow using SSE4.1
532 * instructions. It copies those 8 bytes to 128b register and fills the gaps
533 * with zeroes and following operations are made with word-operating instructs.
534 *****************************************************************************/
535 inline void Sepia8ySSE41(uint8_t * dst, const uint8_t * src,
536 volatile uint8_t * i_intensity)
538 #if defined(CAN_COMPILE_SSE4_1) && 1
539 __asm__ volatile (
540 "pmovzxbw (%1), %%xmm1\n" // y = y - y / 4 + i_intensity / 4
541 "pmovzxbw (%1), %%xmm2\n" // store bytes as words with 0s in between
542 "pmovzxbw (%2), %%xmm3\n"
543 "psrlw $2, %%xmm2\n" // rotate right 2
544 "psubusb %%xmm1, %%xmm2\n" // subtract
545 "psrlw $2, %%xmm3\n"
546 "paddsb %%xmm1, %%xmm3\n" // add
547 "packuswb %%xmm2, %%xmm1\n" // pack back to bytes
548 "movq %%xmm1, (%0) \n" // load to dest
550 :"r" (dst), "r"(src), "r"(i_intensity)
551 :"memory");
552 #endif
555 /*****************************************************************************
556 * Memcpy8BMMX: Copies 8 bytes of memory in two instructions
557 *****************************************************************************
558 * Not quite clean, but it should be fast.
559 *****************************************************************************/
560 inline void Memcpy8BMMX(uint8_t * dst, const uint8_t * src)
562 #if defined(CAN_COMPILE_MMX) && 1
563 __asm__ volatile (
564 "movq (%1), %%xmm0\n"
565 "movq %%xmm0, (%0)\n"
567 :"r" (dst), "r"(src)
568 :"memory");
569 #endif
572 static int FilterCallback ( vlc_object_t *p_this, char const *psz_var,
573 vlc_value_t oldval, vlc_value_t newval,
574 void *p_data )
576 VLC_UNUSED(psz_var); VLC_UNUSED(oldval); VLC_UNUSED(p_data);
577 filter_t *p_filter = (filter_t*)p_this;
578 filter_sys_t *p_sys = p_filter->p_sys;
580 vlc_spin_lock( &p_sys->lock );
581 p_sys->i_intensity = newval.i_int;
582 vlc_spin_unlock( &p_sys->lock );
584 return VLC_SUCCESS;