FS#8961 - Anti-Aliased Fonts.
[kugel-rb.git] / apps / codecs / demac / libdemac / vector_math16_armv6.h
blob61471103bdc8b1d6aec287d090e9df988e7a157a
1 /*
3 libdemac - A Monkey's Audio decoder
5 $Id$
7 Copyright (C) Dave Chapman 2007
9 ARMv6 vector math copyright (C) 2008 Jens Arnold
11 This program is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2 of the License, or
14 (at your option) any later version.
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with this program; if not, write to the Free Software
23 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
25 */
27 /* This version fetches data as 32 bit words, and *requires* v1 to be
28 * 32 bit aligned, otherwise it will result either in a data abort, or
29 * incorrect results (if ARM aligncheck is disabled). */
30 static inline void vector_add(int16_t* v1, int16_t* v2)
32 #if ORDER > 32
33 int cnt = ORDER>>5;
34 #endif
36 #if ORDER > 16
37 #define ADD_SUB_BLOCKS "4"
38 #else
39 #define ADD_SUB_BLOCKS "2"
40 #endif
42 asm volatile (
43 "tst %[v2], #2 \n"
44 "beq 20f \n"
46 "10: \n"
47 "bic %[v2], %[v2], #2 \n"
48 "ldmia %[v2]!, {r4-r5} \n"
49 "1: \n"
50 ".rept " ADD_SUB_BLOCKS "\n"
51 "ldmia %[v2]!, {r6-r7} \n"
52 "ldmia %[v1], {r0-r3} \n"
53 "mov r5, r5, ror #16 \n"
54 "pkhtb r4, r5, r4, asr #16 \n"
55 "sadd16 r0, r0, r4 \n"
56 "pkhbt r5, r5, r6, lsl #16 \n"
57 "sadd16 r1, r1, r5 \n"
58 "ldmia %[v2]!, {r4-r5} \n"
59 "mov r7, r7, ror #16 \n"
60 "pkhtb r6, r7, r6, asr #16 \n"
61 "sadd16 r2, r2, r6 \n"
62 "pkhbt r7, r7, r4, lsl #16 \n"
63 "sadd16 r3, r3, r7 \n"
64 "stmia %[v1]!, {r0-r3} \n"
65 ".endr \n"
66 #if ORDER > 32
67 "subs %[cnt], %[cnt], #1 \n"
68 "bne 1b \n"
69 #endif
70 "b 99f \n"
72 "20: \n"
73 "1: \n"
74 ".rept " ADD_SUB_BLOCKS "\n"
75 "ldmia %[v2]!, {r4-r7} \n"
76 "ldmia %[v1], {r0-r3} \n"
77 "sadd16 r0, r0, r4 \n"
78 "sadd16 r1, r1, r5 \n"
79 "sadd16 r2, r2, r6 \n"
80 "sadd16 r3, r3, r7 \n"
81 "stmia %[v1]!, {r0-r3} \n"
82 ".endr \n"
83 #if ORDER > 32
84 "subs %[cnt], %[cnt], #1 \n"
85 "bne 1b \n"
86 #endif
88 "99: \n"
89 : /* outputs */
90 #if ORDER > 32
91 [cnt]"+r"(cnt),
92 #endif
93 [v1] "+r"(v1),
94 [v2] "+r"(v2)
95 : /* inputs */
96 : /* clobbers */
97 "r0", "r1", "r2", "r3", "r4",
98 "r5", "r6", "r7", "memory"
102 /* This version fetches data as 32 bit words, and *requires* v1 to be
103 * 32 bit aligned, otherwise it will result either in a data abort, or
104 * incorrect results (if ARM aligncheck is disabled). */
105 static inline void vector_sub(int16_t* v1, int16_t* v2)
107 #if ORDER > 32
108 int cnt = ORDER>>5;
109 #endif
111 asm volatile (
112 "tst %[v2], #2 \n"
113 "beq 20f \n"
115 "10: \n"
116 "bic %[v2], %[v2], #2 \n"
117 "ldmia %[v2]!, {r4-r5} \n"
118 "1: \n"
119 ".rept " ADD_SUB_BLOCKS "\n"
120 "ldmia %[v2]!, {r6-r7} \n"
121 "ldmia %[v1], {r0-r3} \n"
122 "mov r5, r5, ror #16 \n"
123 "pkhtb r4, r5, r4, asr #16 \n"
124 "ssub16 r0, r0, r4 \n"
125 "pkhbt r5, r5, r6, lsl #16 \n"
126 "ssub16 r1, r1, r5 \n"
127 "ldmia %[v2]!, {r4-r5} \n"
128 "mov r7, r7, ror #16 \n"
129 "pkhtb r6, r7, r6, asr #16 \n"
130 "ssub16 r2, r2, r6 \n"
131 "pkhbt r7, r7, r4, lsl #16 \n"
132 "ssub16 r3, r3, r7 \n"
133 "stmia %[v1]!, {r0-r3} \n"
134 ".endr \n"
135 #if ORDER > 32
136 "subs %[cnt], %[cnt], #1 \n"
137 "bne 1b \n"
138 #endif
139 "b 99f \n"
141 "20: \n"
142 "1: \n"
143 ".rept " ADD_SUB_BLOCKS "\n"
144 "ldmia %[v2]!, {r4-r7} \n"
145 "ldmia %[v1], {r0-r3} \n"
146 "ssub16 r0, r0, r4 \n"
147 "ssub16 r1, r1, r5 \n"
148 "ssub16 r2, r2, r6 \n"
149 "ssub16 r3, r3, r7 \n"
150 "stmia %[v1]!, {r0-r3} \n"
151 ".endr \n"
152 #if ORDER > 32
153 "subs %[cnt], %[cnt], #1 \n"
154 "bne 1b \n"
155 #endif
157 "99: \n"
158 : /* outputs */
159 #if ORDER > 32
160 [cnt]"+r"(cnt),
161 #endif
162 [v1] "+r"(v1),
163 [v2] "+r"(v2)
164 : /* inputs */
165 : /* clobbers */
166 "r0", "r1", "r2", "r3", "r4",
167 "r5", "r6", "r7", "memory"
171 /* This version fetches data as 32 bit words, and *requires* v1 to be
172 * 32 bit aligned, otherwise it will result either in a data abort, or
173 * incorrect results (if ARM aligncheck is disabled). */
174 static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
176 int res;
177 #if ORDER > 32
178 int cnt = ORDER>>5;
179 #endif
181 #if ORDER > 16
182 #define MLA_BLOCKS "3"
183 #else
184 #define MLA_BLOCKS "1"
185 #endif
187 asm volatile (
188 #if ORDER > 32
189 "mov %[res], #0 \n"
190 #endif
191 "tst %[v2], #2 \n"
192 "beq 20f \n"
194 "10: \n"
195 "bic %[v2], %[v2], #2 \n"
196 "ldmia %[v2]!, {r5-r7} \n"
197 "ldmia %[v1]!, {r0-r1} \n"
198 "1: \n"
199 "pkhbt r8, r6, r5 \n"
200 "ldmia %[v2]!, {r4-r5} \n"
201 #if ORDER > 32
202 "smladx %[res], r0, r8, %[res] \n"
203 #else
204 "smuadx %[res], r0, r8 \n"
205 #endif
206 ".rept " MLA_BLOCKS "\n"
207 "pkhbt r8, r7, r6 \n"
208 "ldmia %[v1]!, {r2-r3} \n"
209 "smladx %[res], r1, r8, %[res] \n"
210 "pkhbt r8, r4, r7 \n"
211 "ldmia %[v2]!, {r6-r7} \n"
212 "smladx %[res], r2, r8, %[res] \n"
213 "pkhbt r8, r5, r4 \n"
214 "ldmia %[v1]!, {r0-r1} \n"
215 "smladx %[res], r3, r8, %[res] \n"
216 "pkhbt r8, r6, r5 \n"
217 "ldmia %[v2]!, {r4-r5} \n"
218 "smladx %[res], r0, r8, %[res] \n"
219 ".endr \n"
221 "pkhbt r8, r7, r6 \n"
222 "ldmia %[v1]!, {r2-r3} \n"
223 "smladx %[res], r1, r8, %[res] \n"
224 "pkhbt r8, r4, r7 \n"
225 #if ORDER > 32
226 "subs %[cnt], %[cnt], #1 \n"
227 "ldmneia %[v2]!, {r6-r7} \n"
228 "smladx %[res], r2, r8, %[res] \n"
229 "pkhbt r8, r5, r4 \n"
230 "ldmneia %[v1]!, {r0-r1} \n"
231 "smladx %[res], r3, r8, %[res] \n"
232 "bne 1b \n"
233 #else
234 "pkhbt r5, r5, r4 \n"
235 "smladx %[res], r2, r8, %[res] \n"
236 "smladx %[res], r3, r5, %[res] \n"
237 #endif
238 "b 99f \n"
240 "20: \n"
241 "ldmia %[v1]!, {r0-r1} \n"
242 "ldmia %[v2]!, {r5-r7} \n"
243 "1: \n"
244 "ldmia %[v1]!, {r2-r3} \n"
245 #if ORDER > 32
246 "smlad %[res], r0, r5, %[res] \n"
247 #else
248 "smuad %[res], r0, r5 \n"
249 #endif
250 ".rept " MLA_BLOCKS "\n"
251 "ldmia %[v2]!, {r4-r5} \n"
252 "smlad %[res], r1, r6, %[res] \n"
253 "ldmia %[v1]!, {r0-r1} \n"
254 "smlad %[res], r2, r7, %[res] \n"
255 "ldmia %[v2]!, {r6-r7} \n"
256 "smlad %[res], r3, r4, %[res] \n"
257 "ldmia %[v1]!, {r2-r3} \n"
258 "smlad %[res], r0, r5, %[res] \n"
259 ".endr \n"
261 "ldmia %[v2]!, {r4-r5} \n"
262 "smlad %[res], r1, r6, %[res] \n"
263 #if ORDER > 32
264 "subs %[cnt], %[cnt], #1 \n"
265 "ldmneia %[v1]!, {r0-r1} \n"
266 "smlad %[res], r2, r7, %[res] \n"
267 "ldmneia %[v2]!, {r6-r7} \n"
268 "smlad %[res], r3, r4, %[res] \n"
269 "bne 1b \n"
270 #else
271 "smlad %[res], r2, r7, %[res] \n"
272 "smlad %[res], r3, r4, %[res] \n"
273 #endif
275 "99: \n"
276 : /* outputs */
277 #if ORDER > 32
278 [cnt]"+r"(cnt),
279 #endif
280 [v1] "+r"(v1),
281 [v2] "+r"(v2),
282 [res]"=r"(res)
283 : /* inputs */
284 : /* clobbers */
285 "r0", "r1", "r2", "r3", "r4",
286 "r5", "r6", "r7", "r8"
288 return res;