20% faster hqdn3d on x86_64
[mplayer/glamo.git] / libmpcodecs / vf_eq2.c
blobe7ee9a4b0c378bc2b6a48940238823d5c9c66f48
1 /*
2 * vf_eq2.c
4 * Software equalizer (brightness, contrast, gamma, saturation)
6 * Hampa Hug <hampa@hampa.ch> (original LUT gamma/contrast/brightness filter)
7 * Daniel Moreno <comac@comac.darktech.org> (saturation, R/G/B gamma support)
8 * Richard Felker (original MMX contrast/brightness code (vf_eq.c))
9 * Michael Niedermayer <michalni@gmx.at> (LUT16)
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <string.h>
15 #include <math.h>
16 #include <inttypes.h>
18 #include "config.h"
19 #include "mp_msg.h"
20 #include "cpudetect.h"
22 #include "img_format.h"
23 #include "mp_image.h"
24 #include "vf.h"
26 #define LUT16
28 /* Per channel parameters */
29 typedef struct eq2_param_t {
30 unsigned char lut[256];
31 #ifdef LUT16
32 uint16_t lut16[256*256];
33 #endif
34 int lut_clean;
36 void (*adjust) (struct eq2_param_t *par, unsigned char *dst, unsigned char *src,
37 unsigned w, unsigned h, unsigned dstride, unsigned sstride);
39 double c;
40 double b;
41 double g;
42 double w;
43 } eq2_param_t;
45 typedef struct vf_priv_s {
46 eq2_param_t param[3];
48 double contrast;
49 double brightness;
50 double saturation;
52 double gamma;
53 double gamma_weight;
54 double rgamma;
55 double ggamma;
56 double bgamma;
58 unsigned buf_w[3];
59 unsigned buf_h[3];
60 unsigned char *buf[3];
61 } vf_eq2_t;
64 static
65 void create_lut (eq2_param_t *par)
67 unsigned i;
68 double g, v;
69 double lw, gw;
71 g = par->g;
72 gw = par->w;
73 lw = 1.0 - gw;
75 if ((g < 0.001) || (g > 1000.0)) {
76 g = 1.0;
79 g = 1.0 / g;
81 for (i = 0; i < 256; i++) {
82 v = (double) i / 255.0;
83 v = par->c * (v - 0.5) + 0.5 + par->b;
85 if (v <= 0.0) {
86 par->lut[i] = 0;
88 else {
89 v = v*lw + pow(v, g)*gw;
91 if (v >= 1.0) {
92 par->lut[i] = 255;
94 else {
95 par->lut[i] = (unsigned char) (256.0 * v);
100 #ifdef LUT16
101 for(i=0; i<256*256; i++){
102 par->lut16[i]= par->lut[i&0xFF] + (par->lut[i>>8]<<8);
104 #endif
106 par->lut_clean = 1;
109 #if HAVE_MMX
110 static
111 void affine_1d_MMX (eq2_param_t *par, unsigned char *dst, unsigned char *src,
112 unsigned w, unsigned h, unsigned dstride, unsigned sstride)
114 unsigned i;
115 int contrast, brightness;
116 unsigned dstep, sstep;
117 int pel;
118 short brvec[4];
119 short contvec[4];
121 // printf("\nmmx: src=%p dst=%p w=%d h=%d ds=%d ss=%d\n",src,dst,w,h,dstride,sstride);
123 contrast = (int) (par->c * 256 * 16);
124 brightness = ((int) (100.0 * par->b + 100.0) * 511) / 200 - 128 - contrast / 32;
126 brvec[0] = brvec[1] = brvec[2] = brvec[3] = brightness;
127 contvec[0] = contvec[1] = contvec[2] = contvec[3] = contrast;
129 sstep = sstride - w;
130 dstep = dstride - w;
132 while (h-- > 0) {
133 __asm__ volatile (
134 "movq (%5), %%mm3 \n\t"
135 "movq (%6), %%mm4 \n\t"
136 "pxor %%mm0, %%mm0 \n\t"
137 "movl %4, %%eax\n\t"
138 ASMALIGN(4)
139 "1: \n\t"
140 "movq (%0), %%mm1 \n\t"
141 "movq (%0), %%mm2 \n\t"
142 "punpcklbw %%mm0, %%mm1 \n\t"
143 "punpckhbw %%mm0, %%mm2 \n\t"
144 "psllw $4, %%mm1 \n\t"
145 "psllw $4, %%mm2 \n\t"
146 "pmulhw %%mm4, %%mm1 \n\t"
147 "pmulhw %%mm4, %%mm2 \n\t"
148 "paddw %%mm3, %%mm1 \n\t"
149 "paddw %%mm3, %%mm2 \n\t"
150 "packuswb %%mm2, %%mm1 \n\t"
151 "add $8, %0 \n\t"
152 "movq %%mm1, (%1) \n\t"
153 "add $8, %1 \n\t"
154 "decl %%eax \n\t"
155 "jnz 1b \n\t"
156 : "=r" (src), "=r" (dst)
157 : "0" (src), "1" (dst), "r" (w >> 3), "r" (brvec), "r" (contvec)
158 : "%eax"
161 for (i = w & 7; i > 0; i--) {
162 pel = ((*src++ * contrast) >> 12) + brightness;
163 if (pel & 768) {
164 pel = (-pel) >> 31;
166 *dst++ = pel;
169 src += sstep;
170 dst += dstep;
173 __asm__ volatile ( "emms \n\t" ::: "memory" );
175 #endif
177 static
178 void apply_lut (eq2_param_t *par, unsigned char *dst, unsigned char *src,
179 unsigned w, unsigned h, unsigned dstride, unsigned sstride)
181 unsigned i, j, w2;
182 unsigned char *lut;
183 uint16_t *lut16;
185 if (!par->lut_clean) {
186 create_lut (par);
189 lut = par->lut;
190 #ifdef LUT16
191 lut16 = par->lut16;
192 w2= (w>>3)<<2;
193 for (j = 0; j < h; j++) {
194 uint16_t *src16= (uint16_t*)src;
195 uint16_t *dst16= (uint16_t*)dst;
196 for (i = 0; i < w2; i+=4) {
197 dst16[i+0] = lut16[src16[i+0]];
198 dst16[i+1] = lut16[src16[i+1]];
199 dst16[i+2] = lut16[src16[i+2]];
200 dst16[i+3] = lut16[src16[i+3]];
202 i <<= 1;
203 #else
204 w2= (w>>3)<<3;
205 for (j = 0; j < h; j++) {
206 for (i = 0; i < w2; i+=8) {
207 dst[i+0] = lut[src[i+0]];
208 dst[i+1] = lut[src[i+1]];
209 dst[i+2] = lut[src[i+2]];
210 dst[i+3] = lut[src[i+3]];
211 dst[i+4] = lut[src[i+4]];
212 dst[i+5] = lut[src[i+5]];
213 dst[i+6] = lut[src[i+6]];
214 dst[i+7] = lut[src[i+7]];
216 #endif
217 for (; i < w; i++) {
218 dst[i] = lut[src[i]];
221 src += sstride;
222 dst += dstride;
226 static
227 int put_image (vf_instance_t *vf, mp_image_t *src, double pts)
229 unsigned i;
230 vf_eq2_t *eq2;
231 mp_image_t *dst;
232 unsigned long img_n,img_c;
234 eq2 = vf->priv;
236 if ((eq2->buf_w[0] != src->w) || (eq2->buf_h[0] != src->h)) {
237 eq2->buf_w[0] = src->w;
238 eq2->buf_h[0] = src->h;
239 eq2->buf_w[1] = eq2->buf_w[2] = src->w >> src->chroma_x_shift;
240 eq2->buf_h[1] = eq2->buf_h[2] = src->h >> src->chroma_y_shift;
241 img_n = eq2->buf_w[0]*eq2->buf_h[0];
242 if(src->num_planes>1){
243 img_c = eq2->buf_w[1]*eq2->buf_h[1];
244 eq2->buf[0] = (unsigned char *) realloc (eq2->buf[0], img_n + 2*img_c);
245 eq2->buf[1] = eq2->buf[0] + img_n;
246 eq2->buf[2] = eq2->buf[1] + img_c;
247 } else
248 eq2->buf[0] = (unsigned char *) realloc (eq2->buf[0], img_n);
251 dst = vf_get_image (vf->next, src->imgfmt, MP_IMGTYPE_EXPORT, 0, src->w, src->h);
253 for (i = 0; i < ((src->num_planes>1)?3:1); i++) {
254 if (eq2->param[i].adjust != NULL) {
255 dst->planes[i] = eq2->buf[i];
256 dst->stride[i] = eq2->buf_w[i];
258 eq2->param[i].adjust (&eq2->param[i], dst->planes[i], src->planes[i],
259 eq2->buf_w[i], eq2->buf_h[i], dst->stride[i], src->stride[i]);
261 else {
262 dst->planes[i] = src->planes[i];
263 dst->stride[i] = src->stride[i];
267 return vf_next_put_image (vf, dst, pts);
270 static
271 void check_values (eq2_param_t *par)
273 /* yuck! floating point comparisons... */
275 if ((par->c == 1.0) && (par->b == 0.0) && (par->g == 1.0)) {
276 par->adjust = NULL;
278 #if HAVE_MMX
279 else if (par->g == 1.0 && gCpuCaps.hasMMX) {
280 par->adjust = &affine_1d_MMX;
282 #endif
283 else {
284 par->adjust = &apply_lut;
288 static
289 void print_values (vf_eq2_t *eq2)
291 mp_msg (MSGT_VFILTER, MSGL_V, "vf_eq2: c=%.2f b=%.2f g=%.4f s=%.2f \n",
292 eq2->contrast, eq2->brightness, eq2->gamma, eq2->saturation
296 static
297 void set_contrast (vf_eq2_t *eq2, double c)
299 eq2->contrast = c;
300 eq2->param[0].c = c;
301 eq2->param[0].lut_clean = 0;
302 check_values (&eq2->param[0]);
303 print_values (eq2);
306 static
307 void set_brightness (vf_eq2_t *eq2, double b)
309 eq2->brightness = b;
310 eq2->param[0].b = b;
311 eq2->param[0].lut_clean = 0;
312 check_values (&eq2->param[0]);
313 print_values (eq2);
316 static
317 void set_gamma (vf_eq2_t *eq2, double g)
319 eq2->gamma = g;
321 eq2->param[0].g = eq2->gamma * eq2->ggamma;
322 eq2->param[1].g = sqrt (eq2->bgamma / eq2->ggamma);
323 eq2->param[2].g = sqrt (eq2->rgamma / eq2->ggamma);
324 eq2->param[0].w = eq2->param[1].w = eq2->param[2].w = eq2->gamma_weight;
326 eq2->param[0].lut_clean = 0;
327 eq2->param[1].lut_clean = 0;
328 eq2->param[2].lut_clean = 0;
330 check_values (&eq2->param[0]);
331 check_values (&eq2->param[1]);
332 check_values (&eq2->param[2]);
334 print_values (eq2);
337 static
338 void set_saturation (vf_eq2_t *eq2, double s)
340 eq2->saturation = s;
342 eq2->param[1].c = s;
343 eq2->param[2].c = s;
345 eq2->param[1].lut_clean = 0;
346 eq2->param[2].lut_clean = 0;
348 check_values (&eq2->param[1]);
349 check_values (&eq2->param[2]);
351 print_values (eq2);
354 static
355 int control (vf_instance_t *vf, int request, void *data)
357 vf_equalizer_t *eq;
359 switch (request) {
360 case VFCTRL_SET_EQUALIZER:
361 eq = (vf_equalizer_t *) data;
363 if (strcmp (eq->item, "gamma") == 0) {
364 set_gamma (vf->priv, exp (log (8.0) * eq->value / 100.0));
365 return CONTROL_TRUE;
367 else if (strcmp (eq->item, "contrast") == 0) {
368 set_contrast (vf->priv, (1.0 / 100.0) * (eq->value + 100));
369 return CONTROL_TRUE;
371 else if (strcmp (eq->item, "brightness") == 0) {
372 set_brightness (vf->priv, (1.0 / 100.0) * eq->value);
373 return CONTROL_TRUE;
375 else if (strcmp (eq->item, "saturation") == 0) {
376 set_saturation (vf->priv, (double) (eq->value + 100) / 100.0);
377 return CONTROL_TRUE;
379 break;
381 case VFCTRL_GET_EQUALIZER:
382 eq = (vf_equalizer_t *) data;
383 if (strcmp (eq->item, "gamma") == 0) {
384 eq->value = (int) (100.0 * log (vf->priv->gamma) / log (8.0));
385 return CONTROL_TRUE;
387 else if (strcmp (eq->item, "contrast") == 0) {
388 eq->value = (int) (100.0 * vf->priv->contrast) - 100;
389 return CONTROL_TRUE;
391 else if (strcmp (eq->item, "brightness") == 0) {
392 eq->value = (int) (100.0 * vf->priv->brightness);
393 return CONTROL_TRUE;
395 else if (strcmp (eq->item, "saturation") == 0) {
396 eq->value = (int) (100.0 * vf->priv->saturation) - 100;
397 return CONTROL_TRUE;
399 break;
402 return vf_next_control (vf, request, data);
405 static
406 int query_format (vf_instance_t *vf, unsigned fmt)
408 switch (fmt) {
409 case IMGFMT_YVU9:
410 case IMGFMT_IF09:
411 case IMGFMT_YV12:
412 case IMGFMT_I420:
413 case IMGFMT_IYUV:
414 case IMGFMT_Y800:
415 case IMGFMT_Y8:
416 case IMGFMT_444P:
417 case IMGFMT_422P:
418 case IMGFMT_411P:
419 return vf_next_query_format (vf, fmt);
422 return 0;
425 static
426 void uninit (vf_instance_t *vf)
428 if (vf->priv != NULL) {
429 free (vf->priv->buf[0]);
430 free (vf->priv);
434 static
435 int open (vf_instance_t *vf, char *args)
437 unsigned i;
438 vf_eq2_t *eq2;
439 double par[8];
441 vf->control = control;
442 vf->query_format = query_format;
443 vf->put_image = put_image;
444 vf->uninit = uninit;
446 vf->priv = (vf_eq2_t *) malloc (sizeof (vf_eq2_t));
447 eq2 = vf->priv;
449 for (i = 0; i < 3; i++) {
450 eq2->buf[i] = NULL;
451 eq2->buf_w[i] = 0;
452 eq2->buf_h[i] = 0;
454 eq2->param[i].adjust = NULL;
455 eq2->param[i].c = 1.0;
456 eq2->param[i].b = 0.0;
457 eq2->param[i].g = 1.0;
458 eq2->param[i].lut_clean = 0;
461 eq2->contrast = 1.0;
462 eq2->brightness = 0.0;
463 eq2->saturation = 1.0;
465 eq2->gamma = 1.0;
466 eq2->gamma_weight = 1.0;
467 eq2->rgamma = 1.0;
468 eq2->ggamma = 1.0;
469 eq2->bgamma = 1.0;
471 if (args != NULL) {
472 par[0] = 1.0;
473 par[1] = 1.0;
474 par[2] = 0.0;
475 par[3] = 1.0;
476 par[4] = 1.0;
477 par[5] = 1.0;
478 par[6] = 1.0;
479 par[7] = 1.0;
480 sscanf (args, "%lf:%lf:%lf:%lf:%lf:%lf:%lf:%lf",
481 par, par + 1, par + 2, par + 3, par + 4, par + 5, par + 6, par + 7
484 eq2->rgamma = par[4];
485 eq2->ggamma = par[5];
486 eq2->bgamma = par[6];
487 eq2->gamma_weight = par[7];
489 set_gamma (eq2, par[0]);
490 set_contrast (eq2, par[1]);
491 set_brightness (eq2, par[2]);
492 set_saturation (eq2, par[3]);
495 return 1;
498 const vf_info_t vf_info_eq2 = {
499 "Software equalizer",
500 "eq2",
501 "Hampa Hug, Daniel Moreno, Richard Felker",
503 &open,
504 NULL