20% faster hqdn3d on x86_64
[mplayer/glamo.git] / libmpcodecs / vf_eq.c
blobe85deeacffa79aabde08c8203bfbdfc8568d63a2
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <string.h>
4 #include <inttypes.h>
6 #include "config.h"
7 #include "mp_msg.h"
8 #include "cpudetect.h"
10 #include "img_format.h"
11 #include "mp_image.h"
12 #include "vf.h"
14 #include "libvo/video_out.h"
16 #include "m_option.h"
17 #include "m_struct.h"
19 static struct vf_priv_s {
20 unsigned char *buf;
21 int brightness;
22 int contrast;
23 } const vf_priv_dflt = {
24 NULL,
29 #if HAVE_MMX
30 static void process_MMX(unsigned char *dest, int dstride, unsigned char *src, int sstride,
31 int w, int h, int brightness, int contrast)
33 int i;
34 int pel;
35 int dstep = dstride-w;
36 int sstep = sstride-w;
37 short brvec[4];
38 short contvec[4];
40 contrast = ((contrast+100)*256*16)/100;
41 brightness = ((brightness+100)*511)/200-128 - contrast/32;
43 brvec[0] = brvec[1] = brvec[2] = brvec[3] = brightness;
44 contvec[0] = contvec[1] = contvec[2] = contvec[3] = contrast;
46 while (h--) {
47 __asm__ volatile (
48 "movq (%5), %%mm3 \n\t"
49 "movq (%6), %%mm4 \n\t"
50 "pxor %%mm0, %%mm0 \n\t"
51 "movl %4, %%eax\n\t"
52 ASMALIGN(4)
53 "1: \n\t"
54 "movq (%0), %%mm1 \n\t"
55 "movq (%0), %%mm2 \n\t"
56 "punpcklbw %%mm0, %%mm1 \n\t"
57 "punpckhbw %%mm0, %%mm2 \n\t"
58 "psllw $4, %%mm1 \n\t"
59 "psllw $4, %%mm2 \n\t"
60 "pmulhw %%mm4, %%mm1 \n\t"
61 "pmulhw %%mm4, %%mm2 \n\t"
62 "paddw %%mm3, %%mm1 \n\t"
63 "paddw %%mm3, %%mm2 \n\t"
64 "packuswb %%mm2, %%mm1 \n\t"
65 "add $8, %0 \n\t"
66 "movq %%mm1, (%1) \n\t"
67 "add $8, %1 \n\t"
68 "decl %%eax \n\t"
69 "jnz 1b \n\t"
70 : "=r" (src), "=r" (dest)
71 : "0" (src), "1" (dest), "r" (w>>3), "r" (brvec), "r" (contvec)
72 : "%eax"
75 for (i = w&7; i; i--)
77 pel = ((*src++* contrast)>>12) + brightness;
78 if(pel&768) pel = (-pel)>>31;
79 *dest++ = pel;
82 src += sstep;
83 dest += dstep;
85 __asm__ volatile ( "emms \n\t" ::: "memory" );
87 #endif
89 static void process_C(unsigned char *dest, int dstride, unsigned char *src, int sstride,
90 int w, int h, int brightness, int contrast)
92 int i;
93 int pel;
94 int dstep = dstride-w;
95 int sstep = sstride-w;
97 contrast = ((contrast+100)*256*256)/100;
98 brightness = ((brightness+100)*511)/200-128 - contrast/512;
100 while (h--) {
101 for (i = w; i; i--)
103 pel = ((*src++* contrast)>>16) + brightness;
104 if(pel&768) pel = (-pel)>>31;
105 *dest++ = pel;
107 src += sstep;
108 dest += dstep;
112 static void (*process)(unsigned char *dest, int dstride, unsigned char *src, int sstride,
113 int w, int h, int brightness, int contrast);
115 /* FIXME: add packed yuv version of process */
117 static int put_image(struct vf_instance_s* vf, mp_image_t *mpi, double pts)
119 mp_image_t *dmpi;
121 dmpi=vf_get_image(vf->next, mpi->imgfmt,
122 MP_IMGTYPE_EXPORT, 0,
123 mpi->w, mpi->h);
125 dmpi->stride[0] = mpi->stride[0];
126 dmpi->planes[1] = mpi->planes[1];
127 dmpi->planes[2] = mpi->planes[2];
128 dmpi->stride[1] = mpi->stride[1];
129 dmpi->stride[2] = mpi->stride[2];
131 if (!vf->priv->buf) vf->priv->buf = malloc(mpi->stride[0]*mpi->h);
133 if ((vf->priv->brightness == 0) && (vf->priv->contrast == 0))
134 dmpi->planes[0] = mpi->planes[0];
135 else {
136 dmpi->planes[0] = vf->priv->buf;
137 process(dmpi->planes[0], dmpi->stride[0],
138 mpi->planes[0], mpi->stride[0],
139 mpi->w, mpi->h, vf->priv->brightness,
140 vf->priv->contrast);
143 return vf_next_put_image(vf,dmpi, pts);
146 static int control(struct vf_instance_s* vf, int request, void* data)
148 vf_equalizer_t *eq;
150 switch (request) {
151 case VFCTRL_SET_EQUALIZER:
152 eq = data;
153 if (!strcmp(eq->item,"brightness")) {
154 vf->priv->brightness = eq->value;
155 return CONTROL_TRUE;
157 else if (!strcmp(eq->item,"contrast")) {
158 vf->priv->contrast = eq->value;
159 return CONTROL_TRUE;
161 break;
162 case VFCTRL_GET_EQUALIZER:
163 eq = data;
164 if (!strcmp(eq->item,"brightness")) {
165 eq->value = vf->priv->brightness;
166 return CONTROL_TRUE;
168 else if (!strcmp(eq->item,"contrast")) {
169 eq->value = vf->priv->contrast;
170 return CONTROL_TRUE;
172 break;
174 return vf_next_control(vf, request, data);
177 static int query_format(struct vf_instance_s* vf, unsigned int fmt)
179 switch (fmt) {
180 case IMGFMT_YVU9:
181 case IMGFMT_IF09:
182 case IMGFMT_YV12:
183 case IMGFMT_I420:
184 case IMGFMT_IYUV:
185 case IMGFMT_CLPL:
186 case IMGFMT_Y800:
187 case IMGFMT_Y8:
188 case IMGFMT_NV12:
189 case IMGFMT_NV21:
190 case IMGFMT_444P:
191 case IMGFMT_422P:
192 case IMGFMT_411P:
193 return vf_next_query_format(vf, fmt);
195 return 0;
198 static void uninit(struct vf_instance_s* vf)
200 if (vf->priv->buf) free(vf->priv->buf);
201 free(vf->priv);
204 static int open(vf_instance_t *vf, char* args)
206 vf->control=control;
207 vf->query_format=query_format;
208 vf->put_image=put_image;
209 vf->uninit=uninit;
211 if(!vf->priv) {
212 vf->priv = malloc(sizeof(struct vf_priv_s));
213 memset(vf->priv, 0, sizeof(struct vf_priv_s));
215 if (args) sscanf(args, "%d:%d", &vf->priv->brightness, &vf->priv->contrast);
217 process = process_C;
218 #if HAVE_MMX
219 if(gCpuCaps.hasMMX) process = process_MMX;
220 #endif
222 return 1;
225 #define ST_OFF(f) M_ST_OFF(struct vf_priv_s,f)
226 static m_option_t vf_opts_fields[] = {
227 {"brightness", ST_OFF(brightness), CONF_TYPE_INT, M_OPT_RANGE,-100 ,100, NULL},
228 {"contrast", ST_OFF(contrast), CONF_TYPE_INT, M_OPT_RANGE,-100 ,100, NULL},
229 { NULL, NULL, 0, 0, 0, 0, NULL }
232 static m_struct_t vf_opts = {
233 "eq",
234 sizeof(struct vf_priv_s),
235 &vf_priv_dflt,
236 vf_opts_fields
239 const vf_info_t vf_info_eq = {
240 "soft video equalizer",
241 "eq",
242 "Richard Felker",
244 open,
245 &vf_opts