Fix for SIMD YUV444 to RGB conversion
[liboggplay.git] / src / liboggplay / oggplay_yuv2rgb.c
blobe4ac3fc075027127a64a490e6ab3948f831b0b76
1 /*
2 Copyright (C) 2003 Commonwealth Scientific and Industrial Research
3 Organisation (CSIRO) Australia
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions
7 are met:
9 - Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
12 - Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
16 - Neither the name of CSIRO Australia nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
23 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ORGANISATION OR
24 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
25 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
26 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 * yuv2rgb.c
36 * YUV->RGB function, using platform-specific optimisations where possible.
38 * Shane Stephens <shane.stephens@annodex.net>
39 * Michael Martin
40 * Marcin Lubonski
41 * Viktor Gal
42 * Makoto Kato <m_kato@ga2.so-net.ne.jp>
45 #include "oggplay_private.h"
46 #include "oggplay_yuv2rgb_template.h"
48 #ifdef __SUNPRO_C
49 #define DISABLE_CPU_FEATURES
50 /* gcc inline asm and intristics have problems with Sun Studio.
51 * We need to fix it.
53 #else
54 /* cpu extension detection */
55 #include "cpu.c"
56 #endif
58 /**
59 * yuv_convert_fptr type is a function pointer type for
60 * the various yuv-rgb converters
62 typedef void (*yuv_convert_fptr) (const OggPlayYUVChannels *yuv,
63 OggPlayRGBChannels *rgb);
65 /* it is useless to determine each YUV conversion run
66 * the cpu type/featurs, thus we save the conversion function
67 * pointers
69 static struct OggPlayYUVConverters {
70 yuv_convert_fptr yuv420rgba; /**< YUV420 to RGBA */
71 yuv_convert_fptr yuv420bgra; /**< YUV420 to BGRA */
72 yuv_convert_fptr yuv420argb; /**< YUV420 to ARGB */
73 yuv_convert_fptr yuv422rgba; /**< YUV422 to RGBA */
74 yuv_convert_fptr yuv422bgra; /**< YUV422 to BGRA */
75 yuv_convert_fptr yuv422argb; /**< YUV422 to ARGB */
76 yuv_convert_fptr yuv444rgba; /**< YUV444 to RGBA */
77 yuv_convert_fptr yuv444bgra; /**< YUV444 to BGRA */
78 yuv_convert_fptr yuv444argb; /**< YUV444 to ARGB */
79 } yuv_conv = {NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL};
81 /**
82 * vanilla implementation of YUV-to-RGB conversion.
84 * - using table-lookups instead of multiplication
85 * - avoid CLAMPing by incorporating
89 #define prec 15
90 static const int CoY = (int)(1.164 * (1 << prec) + 0.5);
91 static const int CoRV = (int)(1.596 * (1 << prec) + 0.5);
92 static const int CoGU = (int)(0.391 * (1 << prec) + 0.5);
93 static const int CoGV = (int)(0.813 * (1 << prec) + 0.5);
94 static const int CoBU = (int)(2.018 * (1 << prec) + 0.5);
96 static int CoefsGU[256] = {0};
97 static int CoefsGV[256];
98 static int CoefsBU[256];
99 static int CoefsRV[256];
100 static int CoefsY[256];
102 #define CLAMP(v) ((v) > 255 ? 255 : (v) < 0 ? 0 : (v))
104 #define VANILLA_YUV2RGB_PIXEL(y, ruv, guv, buv) \
105 r = (CoefsY[y] + ruv) >> prec; \
106 g = (CoefsY[y] + guv) >> prec; \
107 b = (CoefsY[y] + buv) >> prec; \
109 #define VANILLA_RGBA_OUT(out, r, g, b) \
110 out[0] = CLAMP(r); \
111 out[1] = CLAMP(g); \
112 out[2] = CLAMP(b); \
113 out[3] = 255;
115 #define VANILLA_BGRA_OUT(out, r, g, b) \
116 out[0] = CLAMP(b); \
117 out[1] = CLAMP(g); \
118 out[2] = CLAMP(r); \
119 out[3] = 255;
121 #define VANILLA_ARGB_OUT(out, r, g, b) \
122 out[0] = 255; \
123 out[1] = CLAMP(r); \
124 out[2] = CLAMP(g); \
125 out[3] = CLAMP(b);
127 #define VANILLA_ABGR_OUT(out, r, g, b) \
128 out[0] = 255; \
129 out[1] = CLAMP(b); \
130 out[2] = CLAMP(g); \
131 out[3] = CLAMP(r);
133 #define LOOKUP_COEFFS int ruv = CoefsRV[*pv]; \
134 int guv = CoefsGU[*pu] + CoefsGV[*pv]; \
135 int buv = CoefsBU[*pu]; \
136 int r, g, b;
138 /* yuv420p, yuv422p -> */
139 #define CONVERT(OUTPUT_FUNC) LOOKUP_COEFFS \
140 VANILLA_YUV2RGB_PIXEL(py[0], ruv, guv, buv) \
141 OUTPUT_FUNC(dst, r, g, b) \
142 VANILLA_YUV2RGB_PIXEL(py[1], ruv, guv, buv) \
143 OUTPUT_FUNC((dst+4), r, g, b)
145 #define CLEANUP
147 YUV_CONVERT(yuv420_to_rgba_vanilla, CONVERT(VANILLA_RGBA_OUT), VANILLA_RGBA_OUT, 2, 8, 2, 1, 2)
148 YUV_CONVERT(yuv420_to_bgra_vanilla, CONVERT(VANILLA_BGRA_OUT), VANILLA_BGRA_OUT, 2, 8, 2, 1, 2)
149 YUV_CONVERT(yuv420_to_abgr_vanilla, CONVERT(VANILLA_ABGR_OUT), VANILLA_ABGR_OUT, 2, 8, 2, 1, 2)
150 YUV_CONVERT(yuv420_to_argb_vanilla, CONVERT(VANILLA_ARGB_OUT), VANILLA_ARGB_OUT, 2, 8, 2, 1, 2)
152 YUV_CONVERT(yuv422_to_rgba_vanilla, CONVERT(VANILLA_RGBA_OUT), VANILLA_RGBA_OUT, 2, 8, 2, 1, 1)
153 YUV_CONVERT(yuv422_to_bgra_vanilla, CONVERT(VANILLA_BGRA_OUT), VANILLA_BGRA_OUT, 2, 8, 2, 1, 1)
154 YUV_CONVERT(yuv422_to_abgr_vanilla, CONVERT(VANILLA_ABGR_OUT), VANILLA_ABGR_OUT, 2, 8, 2, 1, 1)
155 YUV_CONVERT(yuv422_to_argb_vanilla, CONVERT(VANILLA_ARGB_OUT), VANILLA_ARGB_OUT, 2, 8, 2, 1, 1)
157 #undef CONVERT
159 /* yuv444p -> */
160 #define CONVERT(OUTPUT_FUNC) LOOKUP_COEFFS \
161 VANILLA_YUV2RGB_PIXEL(py[0], ruv, guv, buv) \
162 OUTPUT_FUNC(dst, r, g, b)
164 YUV_CONVERT(yuv444_to_rgba_vanilla, CONVERT(VANILLA_RGBA_OUT), VANILLA_RGBA_OUT, 1, 4, 1, 1, 1)
165 YUV_CONVERT(yuv444_to_bgra_vanilla, CONVERT(VANILLA_BGRA_OUT), VANILLA_BGRA_OUT, 1, 4, 1, 1, 1)
166 YUV_CONVERT(yuv444_to_abgr_vanilla, CONVERT(VANILLA_ABGR_OUT), VANILLA_ABGR_OUT, 1, 4, 1, 1, 1)
167 YUV_CONVERT(yuv444_to_argb_vanilla, CONVERT(VANILLA_ARGB_OUT), VANILLA_ARGB_OUT, 1, 4, 1, 1, 1)
169 #undef CONVERT
170 #undef CLEANUP
172 #ifndef DISABLE_CPU_FEATURES
173 /* although we use cpu runtime detection, we still need these
174 * macros as there's no way e.g. we could compile a x86 asm code
175 * on a ppc machine and vica-versa
177 #if defined(i386) || defined(__x86__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_AMD64)
178 #if !defined(_M_AMD64)
179 #define ENABLE_MMX
180 #endif
181 #include "x86/oggplay_yuv2rgb_x86.c"
182 #if defined(ATTRIBUTE_ALIGNED_MAX) && ATTRIBUTE_ALIGNED_MAX >= 16
183 #define ENABLE_SSE2
184 #endif
185 #elif defined(__ppc__) || defined(__ppc64__)
186 #define ENABLE_ALTIVEC
187 //altivec intristics only working with -maltivec gcc flag,
188 //but we want runtime altivec detection, hence this has to be
189 //fixed!
190 //#include "oggplay_yuv2rgb_altivec.c"
191 #endif
192 #endif
196 * Initialize the lookup-table for vanilla yuv to rgb conversion.
198 static void
199 init_vanilla_coeffs (void)
201 int i;
203 for(i = 0; i < 256; ++i)
205 CoefsGU[i] = -CoGU * (i - 128);
206 CoefsGV[i] = -CoGV * (i - 128);
207 CoefsBU[i] = CoBU * (i - 128);
208 CoefsRV[i] = CoRV * (i - 128);
209 CoefsY[i] = CoY * (i - 16) + (prec/2);
214 * Initialize the function pointers in yuv_conv.
216 * Initialize the function pointers in yuv_conv, based on the
217 * the available CPU extensions.
219 static void
220 init_yuv_converters(void)
222 ogg_uint32_t features = 0;
224 if ( yuv_conv.yuv420rgba == NULL )
226 init_vanilla_coeffs ();
227 #ifndef DISABLE_CPU_FEATURES
228 features = oc_cpu_flags_get();
229 #endif
230 #ifdef ENABLE_SSE2
231 if (features & OC_CPU_X86_SSE2)
233 yuv_conv.yuv420rgba = yuv420_to_rgba_sse2;
234 yuv_conv.yuv420bgra = yuv420_to_bgra_sse2;
235 yuv_conv.yuv420argb = yuv420_to_argb_sse2;
236 yuv_conv.yuv422rgba = yuv422_to_rgba_sse2;
237 yuv_conv.yuv422bgra = yuv422_to_bgra_sse2;
238 yuv_conv.yuv422argb = yuv422_to_argb_sse2;
239 yuv_conv.yuv444rgba = yuv444_to_rgba_sse2;
240 yuv_conv.yuv444bgra = yuv444_to_bgra_sse2;
241 yuv_conv.yuv444argb = yuv444_to_argb_sse2;
242 return;
244 #endif /* SSE2 */
245 #ifdef ENABLE_MMX
246 #ifdef ENABLE_SSE2
247 else
248 #endif
249 if (features & OC_CPU_X86_MMXEXT)
251 yuv_conv.yuv420rgba = yuv420_to_rgba_sse;
252 yuv_conv.yuv420bgra = yuv420_to_bgra_sse;
253 yuv_conv.yuv420argb = yuv420_to_argb_sse;
254 yuv_conv.yuv422rgba = yuv422_to_rgba_sse;
255 yuv_conv.yuv422bgra = yuv422_to_bgra_sse;
256 yuv_conv.yuv422argb = yuv422_to_argb_sse;
257 yuv_conv.yuv444rgba = yuv444_to_rgba_sse;
258 yuv_conv.yuv444bgra = yuv444_to_bgra_sse;
259 yuv_conv.yuv444argb = yuv444_to_argb_sse;
260 return;
262 else if (features & OC_CPU_X86_MMX)
264 yuv_conv.yuv420rgba = yuv420_to_rgba_mmx;
265 yuv_conv.yuv420bgra = yuv420_to_bgra_mmx;
266 yuv_conv.yuv420argb = yuv420_to_argb_mmx;
267 yuv_conv.yuv422rgba = yuv422_to_rgba_mmx;
268 yuv_conv.yuv422bgra = yuv422_to_bgra_mmx;
269 yuv_conv.yuv422argb = yuv422_to_argb_mmx;
270 yuv_conv.yuv444rgba = yuv444_to_rgba_mmx;
271 yuv_conv.yuv444bgra = yuv444_to_bgra_mmx;
272 yuv_conv.yuv444argb = yuv444_to_argb_mmx;
273 return;
275 #elif defined(ENABLE_ALTIVEC)
276 if (features & OC_CPU_PPC_ALTIVEC)
278 yuv_conv.yuv420rgba = yuv420_to_abgr_vanilla;
279 yuv_conv.yuv420bgra = yuv420_to_argb_vanilla;
280 yuv_conv.yuv420argb = yuv420_to_bgra_vanilla;
281 yuv_conv.yuv422rgba = yuv422_to_abgr_vanilla;
282 yuv_conv.yuv422bgra = yuv422_to_argb_vanilla;
283 yuv_conv.yuv422argb = yuv422_to_bgra_vanilla;
284 yuv_conv.yuv444rgba = yuv444_to_abgr_vanilla;
285 yuv_conv.yuv444bgra = yuv444_to_argb_vanilla;
286 yuv_conv.yuv444argb = yuv444_to_bgra_vanilla;
287 return;
289 #endif
292 * no CPU extension was found... using vanilla converter, with respect
293 * to the endianness of the host
295 #if WORDS_BIGENDIAN || IS_BIG_ENDIAN
296 yuv_conv.yuv420rgba = yuv420_to_abgr_vanilla;
297 yuv_conv.yuv420bgra = yuv420_to_argb_vanilla;
298 yuv_conv.yuv420argb = yuv420_to_bgra_vanilla;
299 yuv_conv.yuv422rgba = yuv422_to_abgr_vanilla;
300 yuv_conv.yuv422bgra = yuv422_to_argb_vanilla;
301 yuv_conv.yuv422argb = yuv422_to_bgra_vanilla;
302 yuv_conv.yuv444rgba = yuv444_to_abgr_vanilla;
303 yuv_conv.yuv444bgra = yuv444_to_argb_vanilla;
304 yuv_conv.yuv444argb = yuv444_to_bgra_vanilla;
305 #else
306 yuv_conv.yuv420rgba = yuv420_to_rgba_vanilla;
307 yuv_conv.yuv420bgra = yuv420_to_bgra_vanilla;
308 yuv_conv.yuv420argb = yuv420_to_argb_vanilla;
309 yuv_conv.yuv422rgba = yuv422_to_rgba_vanilla;
310 yuv_conv.yuv422bgra = yuv422_to_bgra_vanilla;
311 yuv_conv.yuv422argb = yuv422_to_argb_vanilla;
312 yuv_conv.yuv444rgba = yuv444_to_rgba_vanilla;
313 yuv_conv.yuv444bgra = yuv444_to_bgra_vanilla;
314 yuv_conv.yuv444argb = yuv444_to_argb_vanilla;
315 #endif
320 void
321 oggplay_yuv2rgba(const OggPlayYUVChannels* yuv, OggPlayRGBChannels* rgb)
323 if (yuv_conv.yuv420rgba == NULL)
324 init_yuv_converters();
326 if (yuv->y_height!=yuv->uv_height)
327 yuv_conv.yuv420rgba(yuv, rgb);
328 else if (yuv->y_width!=yuv->uv_width)
329 yuv_conv.yuv422rgba(yuv,rgb);
330 else
331 yuv_conv.yuv444rgba(yuv,rgb);
334 void
335 oggplay_yuv2bgra(const OggPlayYUVChannels* yuv, OggPlayRGBChannels * rgb)
337 if (yuv_conv.yuv420bgra == NULL)
338 init_yuv_converters();
340 if (yuv->y_height!=yuv->uv_height)
341 yuv_conv.yuv420bgra(yuv, rgb);
342 else if (yuv->y_width!=yuv->uv_width)
343 yuv_conv.yuv422bgra(yuv,rgb);
344 else
345 yuv_conv.yuv444bgra(yuv,rgb);
348 void
349 oggplay_yuv2argb(const OggPlayYUVChannels* yuv, OggPlayRGBChannels * rgb)
351 if (yuv_conv.yuv420argb == NULL)
352 init_yuv_converters();
354 if (yuv->y_height!=yuv->uv_height)
355 yuv_conv.yuv420argb(yuv, rgb);
356 else if (yuv->y_width!=yuv->uv_width)
357 yuv_conv.yuv422argb(yuv,rgb);
358 else
359 yuv_conv.yuv444argb(yuv,rgb);