Fix r28222, including alloca.h directly might break compilation.
[mplayer/glamo.git] / libmpeg2 / motion_comp_alpha.c
blob05cd550841aad25bfadd7a9ded59d728049c9a39
1 /*
2 * motion_comp_alpha.c
3 * Copyright (C) 2002-2003 Falk Hueffner <falk@debian.org>
5 * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
6 * See http://libmpeg2.sourceforge.net/ for updates.
8 * mpeg2dec is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * mpeg2dec is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #include "config.h"
25 #ifdef ARCH_ALPHA
27 #include <inttypes.h>
29 #include "mpeg2.h"
30 #include "attributes.h"
31 #include "mpeg2_internal.h"
32 #include "alpha_asm.h"
34 static inline uint64_t avg2 (uint64_t a, uint64_t b)
36 return (a | b) - (((a ^ b) & BYTE_VEC (0xfe)) >> 1);
39 // Load two unaligned quadwords from addr. This macro only works if
40 // addr is actually unaligned.
41 #define ULOAD16(ret_l,ret_r,addr) \
42 do { \
43 uint64_t _l = ldq_u (addr + 0); \
44 uint64_t _m = ldq_u (addr + 8); \
45 uint64_t _r = ldq_u (addr + 16); \
46 ret_l = extql (_l, addr) | extqh (_m, addr); \
47 ret_r = extql (_m, addr) | extqh (_r, addr); \
48 } while (0)
50 // Load two aligned quadwords from addr.
51 #define ALOAD16(ret_l,ret_r,addr) \
52 do { \
53 ret_l = ldq (addr); \
54 ret_r = ldq (addr + 8); \
55 } while (0)
57 #define OP8(LOAD,LOAD16,STORE) \
58 do { \
59 STORE (LOAD (pixels), block); \
60 pixels += line_size; \
61 block += line_size; \
62 } while (--h)
64 #define OP16(LOAD,LOAD16,STORE) \
65 do { \
66 uint64_t l, r; \
67 LOAD16 (l, r, pixels); \
68 STORE (l, block); \
69 STORE (r, block + 8); \
70 pixels += line_size; \
71 block += line_size; \
72 } while (--h)
74 #define OP8_X2(LOAD,LOAD16,STORE) \
75 do { \
76 uint64_t p0, p1; \
78 p0 = LOAD (pixels); \
79 p1 = p0 >> 8 | ((uint64_t) pixels[8] << 56); \
80 STORE (avg2 (p0, p1), block); \
81 pixels += line_size; \
82 block += line_size; \
83 } while (--h)
85 #define OP16_X2(LOAD,LOAD16,STORE) \
86 do { \
87 uint64_t p0, p1; \
89 LOAD16 (p0, p1, pixels); \
90 STORE (avg2(p0, p0 >> 8 | p1 << 56), block); \
91 STORE (avg2(p1, p1 >> 8 | (uint64_t) pixels[16] << 56), \
92 block + 8); \
93 pixels += line_size; \
94 block += line_size; \
95 } while (--h)
97 #define OP8_Y2(LOAD,LOAD16,STORE) \
98 do { \
99 uint64_t p0, p1; \
100 p0 = LOAD (pixels); \
101 pixels += line_size; \
102 p1 = LOAD (pixels); \
103 do { \
104 uint64_t av = avg2 (p0, p1); \
105 if (--h == 0) line_size = 0; \
106 pixels += line_size; \
107 p0 = p1; \
108 p1 = LOAD (pixels); \
109 STORE (av, block); \
110 block += line_size; \
111 } while (h); \
112 } while (0)
114 #define OP16_Y2(LOAD,LOAD16,STORE) \
115 do { \
116 uint64_t p0l, p0r, p1l, p1r; \
117 LOAD16 (p0l, p0r, pixels); \
118 pixels += line_size; \
119 LOAD16 (p1l, p1r, pixels); \
120 do { \
121 uint64_t avl, avr; \
122 if (--h == 0) line_size = 0; \
123 avl = avg2 (p0l, p1l); \
124 avr = avg2 (p0r, p1r); \
125 p0l = p1l; \
126 p0r = p1r; \
127 pixels += line_size; \
128 LOAD16 (p1l, p1r, pixels); \
129 STORE (avl, block); \
130 STORE (avr, block + 8); \
131 block += line_size; \
132 } while (h); \
133 } while (0)
135 #define OP8_XY2(LOAD,LOAD16,STORE) \
136 do { \
137 uint64_t pl, ph; \
138 uint64_t p1 = LOAD (pixels); \
139 uint64_t p2 = p1 >> 8 | ((uint64_t) pixels[8] << 56); \
141 ph = (((p1 & ~BYTE_VEC (0x03)) >> 2) + \
142 ((p2 & ~BYTE_VEC (0x03)) >> 2)); \
143 pl = ((p1 & BYTE_VEC (0x03)) + \
144 (p2 & BYTE_VEC (0x03))); \
146 do { \
147 uint64_t npl, nph; \
149 pixels += line_size; \
150 p1 = LOAD (pixels); \
151 p2 = (p1 >> 8) | ((uint64_t) pixels[8] << 56); \
152 nph = (((p1 & ~BYTE_VEC (0x03)) >> 2) + \
153 ((p2 & ~BYTE_VEC (0x03)) >> 2)); \
154 npl = ((p1 & BYTE_VEC (0x03)) + \
155 (p2 & BYTE_VEC (0x03))); \
157 STORE (ph + nph + \
158 (((pl + npl + BYTE_VEC (0x02)) >> 2) & \
159 BYTE_VEC (0x03)), block); \
161 block += line_size; \
162 pl = npl; \
163 ph = nph; \
164 } while (--h); \
165 } while (0)
167 #define OP16_XY2(LOAD,LOAD16,STORE) \
168 do { \
169 uint64_t p0, p1, p2, p3, pl_l, ph_l, pl_r, ph_r; \
170 LOAD16 (p0, p2, pixels); \
171 p1 = p0 >> 8 | (p2 << 56); \
172 p3 = p2 >> 8 | ((uint64_t)pixels[16] << 56); \
174 ph_l = (((p0 & ~BYTE_VEC (0x03)) >> 2) + \
175 ((p1 & ~BYTE_VEC (0x03)) >> 2)); \
176 pl_l = ((p0 & BYTE_VEC (0x03)) + \
177 (p1 & BYTE_VEC(0x03))); \
178 ph_r = (((p2 & ~BYTE_VEC (0x03)) >> 2) + \
179 ((p3 & ~BYTE_VEC (0x03)) >> 2)); \
180 pl_r = ((p2 & BYTE_VEC (0x03)) + \
181 (p3 & BYTE_VEC (0x03))); \
183 do { \
184 uint64_t npl_l, nph_l, npl_r, nph_r; \
186 pixels += line_size; \
187 LOAD16 (p0, p2, pixels); \
188 p1 = p0 >> 8 | (p2 << 56); \
189 p3 = p2 >> 8 | ((uint64_t)pixels[16] << 56); \
190 nph_l = (((p0 & ~BYTE_VEC (0x03)) >> 2) + \
191 ((p1 & ~BYTE_VEC (0x03)) >> 2)); \
192 npl_l = ((p0 & BYTE_VEC (0x03)) + \
193 (p1 & BYTE_VEC (0x03))); \
194 nph_r = (((p2 & ~BYTE_VEC (0x03)) >> 2) + \
195 ((p3 & ~BYTE_VEC (0x03)) >> 2)); \
196 npl_r = ((p2 & BYTE_VEC (0x03)) + \
197 (p3 & BYTE_VEC (0x03))); \
199 STORE (ph_l + nph_l + \
200 (((pl_l + npl_l + BYTE_VEC (0x02)) >> 2) & \
201 BYTE_VEC(0x03)), block); \
202 STORE (ph_r + nph_r + \
203 (((pl_r + npl_r + BYTE_VEC (0x02)) >> 2) & \
204 BYTE_VEC(0x03)), block + 8); \
206 block += line_size; \
207 pl_l = npl_l; \
208 ph_l = nph_l; \
209 pl_r = npl_r; \
210 ph_r = nph_r; \
211 } while (--h); \
212 } while (0)
214 #define MAKE_OP(OPNAME,SIZE,SUFF,OPKIND,STORE) \
215 static void MC_ ## OPNAME ## _ ## SUFF ## _ ## SIZE ## _alpha \
216 (uint8_t *restrict block, const uint8_t *restrict pixels, \
217 int line_size, int h) \
219 if ((uint64_t) pixels & 0x7) { \
220 OPKIND (uldq, ULOAD16, STORE); \
221 } else { \
222 OPKIND (ldq, ALOAD16, STORE); \
226 #define PIXOP(OPNAME,STORE) \
227 MAKE_OP (OPNAME, 8, o, OP8, STORE); \
228 MAKE_OP (OPNAME, 8, x, OP8_X2, STORE); \
229 MAKE_OP (OPNAME, 8, y, OP8_Y2, STORE); \
230 MAKE_OP (OPNAME, 8, xy, OP8_XY2, STORE); \
231 MAKE_OP (OPNAME, 16, o, OP16, STORE); \
232 MAKE_OP (OPNAME, 16, x, OP16_X2, STORE); \
233 MAKE_OP (OPNAME, 16, y, OP16_Y2, STORE); \
234 MAKE_OP (OPNAME, 16, xy, OP16_XY2, STORE);
236 #define STORE(l,b) stq (l, b)
237 PIXOP (put, STORE);
238 #undef STORE
239 #define STORE(l,b) stq (avg2 (l, ldq (b)), b);
240 PIXOP (avg, STORE);
242 mpeg2_mc_t mpeg2_mc_alpha = {
243 { MC_put_o_16_alpha, MC_put_x_16_alpha,
244 MC_put_y_16_alpha, MC_put_xy_16_alpha,
245 MC_put_o_8_alpha, MC_put_x_8_alpha,
246 MC_put_y_8_alpha, MC_put_xy_8_alpha },
247 { MC_avg_o_16_alpha, MC_avg_x_16_alpha,
248 MC_avg_y_16_alpha, MC_avg_xy_16_alpha,
249 MC_avg_o_8_alpha, MC_avg_x_8_alpha,
250 MC_avg_y_8_alpha, MC_avg_xy_8_alpha }
253 #endif